import fs from "fs" import path from "path" /** * Page information structure */ export class PageInfo { constructor( filePath, route, url, title, priority, changeFrequency, lastModified, isDynamic, dynamicParams, contentType ) { this.filePath = filePath this.route = route this.url = url this.title = title this.priority = priority this.changeFrequency = changeFrequency this.lastModified = lastModified this.isDynamic = isDynamic this.dynamicParams = dynamicParams this.contentType = contentType } } /** * SEO analysis results */ export class SEOAnalysis { constructor( totalPages, internalLinks, orphanedPages, brokenLinks, pagesWithIssues, averageLinkDensity, pageTypes ) { this.totalPages = totalPages this.internalLinks = internalLinks this.orphanedPages = orphanedPages this.brokenLinks = brokenLinks this.pagesWithIssues = pagesWithIssues this.averageLinkDensity = averageLinkDensity this.pageTypes = pageTypes } } /** * Discover all React pages in the app directory */ export function discoverPages(appDir) { const pages = [] if (!fs.existsSync(appDir)) { throw new Error(`App directory not found: ${appDir}`) } // Scan the app directory scanDirectory(appDir, "", pages) return pages.sort((a, b) => a.route.localeCompare(b.route)) } /** * Scan directory recursively for page files */ function scanDirectory(dir, relativePath, pages) { const items = fs.readdirSync(dir, { withFileTypes: true }) for (const item of items) { const fullPath = path.join(dir, item.name) const itemRelativePath = relativePath ? path.join(relativePath, item.name) : item.name if (item.isDirectory()) { // Skip special directories if (["api", "lib", "components", "hooks"].includes(item.name)) { continue } // Scan subdirectories scanDirectory(fullPath, itemRelativePath, pages) } else if (item.isFile()) { // Process page files if (isPageFile(item.name)) { processPageFile(fullPath, itemRelativePath, pages) } } } } /** * Check if file is a page component */ function isPageFile(filename) { const pageExtensions = [".tsx", ".ts", ".jsx", ".js"] return ( pageExtensions.some((ext) => filename.endsWith(ext)) && (filename === "page.tsx" || filename === "page.ts" || filename === "page.jsx" || filename === "page.js") ) } /** * Process a page file */ function processPageFile(fullPath, relativePath, pages) { const route = convertPathToRoute(relativePath) const url = `/${route}` const title = extractPageTitle(fullPath) || route const priority = calculatePagePriority(route) const changeFrequency = calculateChangeFrequency(route) const isDynamic = isDynamicRoute(relativePath) const dynamicParams = extractDynamicParams(relativePath) const lastModified = getLastModifiedTime(fullPath) const contentType = getContentType(relativePath) pages.push( new PageInfo( fullPath, route, url, title, priority, changeFrequency, lastModified, isDynamic, dynamicParams, contentType ) ) } /** * Convert file path to Next.js route */ function convertPathToRoute(filePath) { // Remove 'page.tsx' extension let route = filePath.replace(/page\.(tsx|ts|jsx|js)$/, "") // Replace backslashes with forward slashes route = route.replace(/\\/g, "/") // Handle index routes if (route === "" || route === ".") { return "" } // Handle dynamic routes route = route.replace(/\[(\w+)\]/g, ":$1") return route } /** * Extract page title from file content */ function extractPageTitle(filePath) { try { const content = fs.readFileSync(filePath, "utf8") // Look for title in metadata or component const titleMatch = content.match( /export\s+default\s+function\s+\w*\s*\([^)]*\)\s*{[^}]*title:\s*['"`]([^'"`]+)['"`]/ ) if (titleMatch) { return titleMatch[1] } // Look for title in metadata function const metadataMatch = content.match( /export\s+async\s+function\s+generateMetadata\(\)\s*{[^}]*title:\s*['"`]([^'"`]+)['"`]/ ) if (metadataMatch) { return metadataMatch[1] } return null } catch (error) { return null } } /** * Calculate page priority based on route */ function calculatePagePriority(route) { const priorityMap = { "": 1.0, // Home page "about-us": 0.9, "contact-us": 0.9, services: 0.9, "vending-machines": 0.9, manuals: 0.8, } // Check exact match first if (priorityMap[route]) { return priorityMap[route] } // Check partial matches for (const [key, value] of Object.entries(priorityMap)) { if (route.startsWith(key) && route !== key) { return Math.max(0.1, value - 0.1) // Slightly lower priority for child pages } } // Default priority return 0.5 } /** * Calculate change frequency based on route type */ function calculateChangeFrequency(route) { if (route === "" || route === "about-us" || route === "contact-us") { return "monthly" } if (route.startsWith("services") || route.startsWith("vending-machines")) { return "weekly" } if (route.startsWith("manuals") || route.startsWith("blog")) { return "daily" } return "monthly" } /** * Check if route is dynamic */ function isDynamicRoute(filePath) { return filePath.includes("[") && filePath.includes("]") } /** * Extract dynamic parameters from route */ function extractDynamicParams(filePath) { const matches = filePath.match(/\[(\w+)\]/g) return matches ? matches.map((match) => match.slice(1, -1)) : [] } /** * Get last modified time of file */ function getLastModifiedTime(filePath) { const stats = fs.statSync(filePath) return stats.mtime } /** * Determine content type based on path */ function getContentType(filePath) { if (filePath.includes("api")) { return "api" } if (filePath.includes("layout")) { return "layout" } return "page" } /** * Analyze internal links in pages */ export function analyzeInternalLinks(pages, contentByRoute) { const internalLinks = [] const brokenLinks = [] const orphanedPages = [] const pagesWithIssues = [] const linkCounts = {} // Find pages that link to other pages for (const [route, content] of Object.entries(contentByRoute)) { const links = extractLinksFromContent(content) for (const link of links) { if (isInternalLink(link.href)) { const targetRoute = convertLinkToRoute(link.href) if (pages.some((p) => p.route === targetRoute)) { internalLinks.push({ from: route, to: targetRoute, text: link.text, }) linkCounts[route] = (linkCounts[route] || 0) + 1 } else { brokenLinks.push(`${route} -> ${link.href}`) } } } } // Find orphaned pages (pages with no inbound links) for (const page of pages) { const hasInboundLinks = internalLinks.some((link) => link.to === page.route) if (!hasInboundLinks && page.route !== "") { orphanedPages.push(page.route) } } // Analyze pages with issues for (const [route, issues] of Object.entries( analyzePageIssues(pages, contentByRoute) )) { if (issues.length > 0) { pagesWithIssues.push({ url: route, issues }) } } // Calculate average link density const totalLinks = Object.values(linkCounts).reduce( (sum, count) => sum + count, 0 ) const totalPages = Object.keys(linkCounts).length const averageLinkDensity = totalPages > 0 ? totalLinks / totalPages : 0 // Analyze page types const pageTypes = {} for (const page of pages) { const type = page.contentType pageTypes[type] = (pageTypes[type] || 0) + 1 } return new SEOAnalysis( pages.length, internalLinks.length, orphanedPages, brokenLinks, pagesWithIssues, averageLinkDensity, pageTypes ) } /** * Extract links from content */ function extractLinksFromContent(content) { const links = [] const linkRegex = /]+href="([^"]+)"[^>]*>([^<]+)<\/a>/g let match while ((match = linkRegex.exec(content)) !== null) { links.push({ href: match[1], text: match[2].trim(), }) } return links } /** * Check if link is internal */ function isInternalLink(href) { return ( !href.startsWith("http") && !href.startsWith("mailto:") && !href.startsWith("tel:") ) } /** * Convert link to route */ function convertLinkToRoute(href) { // Remove leading slash let route = href.replace(/^\//, "") // Remove query parameters and fragments route = route.split(/[?#]/)[0] // Handle dynamic routes route = route.replace(/:\w+/g, "*") return route } /** * Analyze individual page issues */ function analyzePageIssues(pages, contentByRoute) { const issues = {} for (const [route, content] of Object.entries(contentByRoute)) { const pageIssues = [] // Check for missing title if (!content.match(/title:/i) && !content.match(/Head>/i)) { pageIssues.push("Missing title") } // Check for missing description if (!content.match(/description:/i) && !content.match(/Head>/i)) { pageIssues.push("Missing description") } // Check for too many links const linkCount = extractLinksFromContent(content).length if (linkCount > 10) { pageIssues.push(`Too many internal links (${linkCount})`) } if (pageIssues.length > 0) { issues[route] = pageIssues } } return issues } /** * Generate sitemap XML from pages */ export function generateSitemapXml(pages) { let xml = '\n' xml += '\n' const currentDate = new Date().toISOString() for (const page of pages) { xml += " \n" xml += ` ${page.url}\n` xml += ` ${currentDate}\n` xml += ` ${page.changeFrequency}\n` xml += ` ${page.priority.toFixed(1)}\n` xml += " \n" } xml += "" return xml }