import fs from 'fs'; import path from 'path'; /** * Page information structure */ export interface PageInfo { filePath: string; route: string; url: string; title: string; priority: number; changeFrequency: 'always' | 'hourly' | 'daily' | 'weekly' | 'monthly' | 'yearly' | 'never'; lastModified: Date; isDynamic: boolean; dynamicParams?: string[]; contentType: 'page' | 'layout' | 'api'; } /** * Dynamic route information */ export interface DynamicRouteInfo { pattern: string; paramName: string; example: string; } /** * SEO analysis results */ export interface SEOAnalysis { totalPages: number; internalLinks: number; orphanedPages: string[]; brokenLinks: string[]; pagesWithIssues: { url: string; issues: string[]; }[]; averageLinkDensity: number; pageTypes: { [key: string]: number; }; } /** * Discover all React pages in the app directory */ export function discoverPages(appDir: string): PageInfo[] { const pages: PageInfo[] = []; if (!fs.existsSync(appDir)) { throw new Error(`App directory not found: ${appDir}`); } // Scan the app directory scanDirectory(appDir, '', pages); return pages.sort((a, b) => a.route.localeCompare(b.route)); } /** * Scan directory recursively for page files */ function scanDirectory(dir: string, relativePath: string, pages: PageInfo[]): void { const items = fs.readdirSync(dir, { withFileTypes: true }); for (const item of items) { const fullPath = path.join(dir, item.name); const itemRelativePath = relativePath ? path.join(relativePath, item.name) : item.name; if (item.isDirectory()) { // Skip special directories if (['api', 'lib', 'components', 'hooks'].includes(item.name)) { continue; } // Scan subdirectories scanDirectory(fullPath, itemRelativePath, pages); } else if (item.isFile()) { // Process page files if (isPageFile(item.name)) { processPageFile(fullPath, itemRelativePath, pages); } } } } /** * Check if file is a page component */ function isPageFile(filename: string): boolean { const pageExtensions = ['.tsx', '.ts', '.jsx', '.js']; return pageExtensions.some(ext => filename.endsWith(ext)) && (filename === 'page.tsx' || filename === 'page.ts' || filename === 'page.jsx' || filename === 'page.js'); } /** * Process a page file */ function processPageFile(fullPath: string, relativePath: string, pages: PageInfo[]): void { const route = convertPathToRoute(relativePath); const url = `/${route}`; const title = extractPageTitle(fullPath) || route; const priority = calculatePagePriority(route); const changeFrequency = calculateChangeFrequency(route); const isDynamic = isDynamicRoute(relativePath); const dynamicParams = extractDynamicParams(relativePath); const lastModified = getLastModifiedTime(fullPath); const contentType = getContentType(relativePath); pages.push({ filePath: fullPath, route, url, title, priority, changeFrequency, lastModified, isDynamic, dynamicParams, contentType }); } /** * Convert file path to Next.js route */ function convertPathToRoute(filePath: string): string { // Remove 'page.tsx' extension let route = filePath.replace(/page\.(tsx|ts|jsx|js)$/, ''); // Replace backslashes with forward slashes route = route.replace(/\\/g, '/'); // Handle index routes if (route === '' || route === '.') { return ''; } // Handle dynamic routes route = route.replace(/\[(\w+)\]/g, ':$1'); return route; } /** * Extract page title from file content */ function extractPageTitle(filePath: string): string | null { try { const content = fs.readFileSync(filePath, 'utf8'); // Look for title in metadata or component const titleMatch = content.match(/export\s+default\s+function\s+\w*\s*\([^)]*\)\s*{[^}]*title:\s*['"`]([^'"`]+)['"`]/); if (titleMatch) { return titleMatch[1]; } // Look for title in metadata function const metadataMatch = content.match(/export\s+async\s+function\s+generateMetadata\(\)\s*{[^}]*title:\s*['"`]([^'"`]+)['"`]/); if (metadataMatch) { return metadataMatch[1]; } return null; } catch (error) { return null; } } /** * Calculate page priority based on route */ function calculatePagePriority(route: string): number { const priorityMap: Record = { '': 1.0, // Home page 'about-us': 0.9, 'contact-us': 0.9, 'services': 0.9, 'vending-machines': 0.9, 'manuals': 0.8 }; // Check exact match first if (priorityMap[route]) { return priorityMap[route]; } // Check partial matches for (const [key, value] of Object.entries(priorityMap)) { if (route.startsWith(key) && route !== key) { return Math.max(0.1, value - 0.1); // Slightly lower priority for child pages } } // Default priority return 0.5; } /** * Calculate change frequency based on route type */ function calculateChangeFrequency(route: string): 'always' | 'hourly' | 'daily' | 'weekly' | 'monthly' | 'yearly' | 'never' { if (route === '' || route === 'about-us' || route === 'contact-us') { return 'monthly'; } if (route.startsWith('services') || route.startsWith('vending-machines')) { return 'weekly'; } if (route.startsWith('manuals') || route.startsWith('blog')) { return 'daily'; } return 'monthly'; } /** * Check if route is dynamic */ function isDynamicRoute(filePath: string): boolean { return filePath.includes('[') && filePath.includes(']'); } /** * Extract dynamic parameters from route */ function extractDynamicParams(filePath: string): string[] { const matches = filePath.match(/\[(\w+)\]/g); return matches ? matches.map(match => match.slice(1, -1)) : []; } /** * Get last modified time of file */ function getLastModifiedTime(filePath: string): Date { const stats = fs.statSync(filePath); return stats.mtime; } /** * Determine content type based on path */ function getContentType(filePath: string): 'page' | 'layout' | 'api' { if (filePath.includes('api')) { return 'api'; } if (filePath.includes('layout')) { return 'layout'; } return 'page'; } /** * Analyze internal links in pages */ export function analyzeInternalLinks(pages: PageInfo[], contentByRoute: Record): SEOAnalysis { const internalLinks: { from: string; to: string; text: string }[] = []; const brokenLinks: string[] = []; const orphanedPages: string[] = []; const pagesWithIssues: { url: string; issues: string[] }[] = []; const linkCounts: Record = {}; // Find pages that link to other pages for (const [route, content] of Object.entries(contentByRoute)) { const links = extractLinksFromContent(content); for (const link of links) { if (isInternalLink(link.href)) { const targetRoute = convertLinkToRoute(link.href); if (pages.some(p => p.route === targetRoute)) { internalLinks.push({ from: route, to: targetRoute, text: link.text }); linkCounts[route] = (linkCounts[route] || 0) + 1; } else { brokenLinks.push(`${route} -> ${link.href}`); } } } } // Find orphaned pages (pages with no inbound links) for (const page of pages) { const hasInboundLinks = internalLinks.some(link => link.to === page.route); if (!hasInboundLinks && page.route !== '') { orphanedPages.push(page.route); } } // Analyze pages with issues for (const [route, issues] of Object.entries(analyzePageIssues(pages, contentByRoute))) { if (issues.length > 0) { pagesWithIssues.push({ url: route, issues }); } } // Calculate average link density const totalLinks = Object.values(linkCounts).reduce((sum, count) => sum + count, 0); const totalPages = Object.keys(linkCounts).length; const averageLinkDensity = totalPages > 0 ? totalLinks / totalPages : 0; // Analyze page types const pageTypes: Record = {}; for (const page of pages) { const type = page.contentType; pageTypes[type] = (pageTypes[type] || 0) + 1; } return { totalLinks: internalLinks.length, totalPages: pages.length, internalLinks: internalLinks.length, orphanedPages, brokenLinks, pagesWithIssues, averageLinkDensity, pageTypes }; } /** * Extract links from content */ function extractLinksFromContent(content: string): { href: string; text: string }[] { const links: { href: string; text: string }[] = []; const linkRegex = /]+href="([^"]+)"[^>]*>([^<]+)<\/a>/g; let match; while ((match = linkRegex.exec(content)) !== null) { links.push({ href: match[1], text: match[2].trim() }); } return links; } /** * Check if link is internal */ function isInternalLink(href: string): boolean { return !href.startsWith('http') && !href.startsWith('mailto:') && !href.startsWith('tel:'); } /** * Convert link to route */ function convertLinkToRoute(href: string): string { // Remove leading slash let route = href.replace(/^\//, ''); // Remove query parameters and fragments route = route.split(/[?#]/)[0]; // Handle dynamic routes route = route.replace(/:\w+/g, '*'); return route; } /** * Analyze individual page issues */ function analyzePageIssues(pages: PageInfo[], contentByRoute: Record): Record { const issues: Record = {}; for (const [route, content] of Object.entries(contentByRoute)) { const pageIssues: string[] = []; // Check for missing title if (!content.match(/title:/i) && !content.match(/Head>/i)) { pageIssues.push('Missing title'); } // Check for missing description if (!content.match(/description:/i) && !content.match(/Head>/i)) { pageIssues.push('Missing description'); } // Check for too many links const linkCount = extractLinksFromContent(content).length; if (linkCount > 10) { pageIssues.push(`Too many internal links (${linkCount})`); } if (pageIssues.length > 0) { issues[route] = pageIssues; } } return issues; } /** * Generate sitemap XML from pages */ export function generateSitemapXml(pages: PageInfo[]): string { let xml = '\n'; xml += '\n'; const currentDate = new Date().toISOString(); for (const page of pages) { xml += ' \n'; xml += ` ${page.url}\n`; xml += ` ${currentDate}\n`; xml += ` ${page.changeFrequency}\n`; xml += ` ${page.priority.toFixed(1)}\n`; xml += ' \n'; } xml += ''; return xml; }