import fs from "fs" import path from "path" /** * Page information structure */ export interface PageInfo { filePath: string route: string url: string title: string priority: number changeFrequency: | "always" | "hourly" | "daily" | "weekly" | "monthly" | "yearly" | "never" lastModified: Date isDynamic: boolean dynamicParams?: string[] contentType: "page" | "layout" | "api" } /** * Dynamic route information */ export interface DynamicRouteInfo { pattern: string paramName: string example: string } /** * SEO analysis results */ export interface SEOAnalysis { totalLinks: number totalPages: number internalLinks: number orphanedPages: string[] brokenLinks: string[] pagesWithIssues: { url: string issues: string[] }[] averageLinkDensity: number pageTypes: { [key: string]: number } } /** * Discover all React pages in the app directory */ export function discoverPages(appDir: string): PageInfo[] { const pages: PageInfo[] = [] if (!fs.existsSync(appDir)) { throw new Error(`App directory not found: ${appDir}`) } // Scan the app directory scanDirectory(appDir, "", pages) return pages.sort((a, b) => a.route.localeCompare(b.route)) } /** * Scan directory recursively for page files */ function scanDirectory( dir: string, relativePath: string, pages: PageInfo[] ): void { const items = fs.readdirSync(dir, { withFileTypes: true }) for (const item of items) { const fullPath = path.join(dir, item.name) const itemRelativePath = relativePath ? path.join(relativePath, item.name) : item.name if (item.isDirectory()) { // Skip special directories if (["api", "lib", "components", "hooks"].includes(item.name)) { continue } // Scan subdirectories scanDirectory(fullPath, itemRelativePath, pages) } else if (item.isFile()) { // Process page files if (isPageFile(item.name)) { processPageFile(fullPath, itemRelativePath, pages) } } } } /** * Check if file is a page component */ function isPageFile(filename: string): boolean { const pageExtensions = [".tsx", ".ts", ".jsx", ".js"] return ( pageExtensions.some((ext) => filename.endsWith(ext)) && (filename === "page.tsx" || filename === "page.ts" || filename === "page.jsx" || filename === "page.js") ) } /** * Process a page file */ function processPageFile( fullPath: string, relativePath: string, pages: PageInfo[] ): void { const route = convertPathToRoute(relativePath) const url = `/${route}` const title = extractPageTitle(fullPath) || route const priority = calculatePagePriority(route) const changeFrequency = calculateChangeFrequency(route) const isDynamic = isDynamicRoute(relativePath) const dynamicParams = extractDynamicParams(relativePath) const lastModified = getLastModifiedTime(fullPath) const contentType = getContentType(relativePath) pages.push({ filePath: fullPath, route, url, title, priority, changeFrequency, lastModified, isDynamic, dynamicParams, contentType, }) } /** * Convert file path to Next.js route */ function convertPathToRoute(filePath: string): string { // Remove 'page.tsx' extension let route = filePath.replace(/page\.(tsx|ts|jsx|js)$/, "") // Replace backslashes with forward slashes route = route.replace(/\\/g, "/") // Handle index routes if (route === "" || route === ".") { return "" } // Handle dynamic routes route = route.replace(/\[(\w+)\]/g, ":$1") return route } /** * Extract page title from file content */ function extractPageTitle(filePath: string): string | null { try { const content = fs.readFileSync(filePath, "utf8") // Look for title in metadata or component const titleMatch = content.match( /export\s+default\s+function\s+\w*\s*\([^)]*\)\s*{[^}]*title:\s*['"`]([^'"`]+)['"`]/ ) if (titleMatch) { return titleMatch[1] } // Look for title in metadata function const metadataMatch = content.match( /export\s+async\s+function\s+generateMetadata\(\)\s*{[^}]*title:\s*['"`]([^'"`]+)['"`]/ ) if (metadataMatch) { return metadataMatch[1] } return null } catch (error) { return null } } /** * Calculate page priority based on route */ function calculatePagePriority(route: string): number { const priorityMap: Record = { "": 1.0, // Home page "about-us": 0.9, "contact-us": 0.9, services: 0.9, "vending-machines": 0.9, manuals: 0.8, } // Check exact match first if (priorityMap[route]) { return priorityMap[route] } // Check partial matches for (const [key, value] of Object.entries(priorityMap)) { if (route.startsWith(key) && route !== key) { return Math.max(0.1, value - 0.1) // Slightly lower priority for child pages } } // Default priority return 0.5 } /** * Calculate change frequency based on route type */ function calculateChangeFrequency( route: string ): "always" | "hourly" | "daily" | "weekly" | "monthly" | "yearly" | "never" { if (route === "" || route === "about-us" || route === "contact-us") { return "monthly" } if (route.startsWith("services") || route.startsWith("vending-machines")) { return "weekly" } if (route.startsWith("manuals") || route.startsWith("blog")) { return "daily" } return "monthly" } /** * Check if route is dynamic */ function isDynamicRoute(filePath: string): boolean { return filePath.includes("[") && filePath.includes("]") } /** * Extract dynamic parameters from route */ function extractDynamicParams(filePath: string): string[] { const matches = filePath.match(/\[(\w+)\]/g) return matches ? matches.map((match) => match.slice(1, -1)) : [] } /** * Get last modified time of file */ function getLastModifiedTime(filePath: string): Date { const stats = fs.statSync(filePath) return stats.mtime } /** * Determine content type based on path */ function getContentType(filePath: string): "page" | "layout" | "api" { if (filePath.includes("api")) { return "api" } if (filePath.includes("layout")) { return "layout" } return "page" } /** * Analyze internal links in pages */ export function analyzeInternalLinks( pages: PageInfo[], contentByRoute: Record ): SEOAnalysis { const internalLinks: { from: string; to: string; text: string }[] = [] const brokenLinks: string[] = [] const orphanedPages: string[] = [] const pagesWithIssues: { url: string; issues: string[] }[] = [] const linkCounts: Record = {} // Find pages that link to other pages for (const [route, content] of Object.entries(contentByRoute)) { const links = extractLinksFromContent(content) for (const link of links) { if (isInternalLink(link.href)) { const targetRoute = convertLinkToRoute(link.href) if (pages.some((p) => p.route === targetRoute)) { internalLinks.push({ from: route, to: targetRoute, text: link.text, }) linkCounts[route] = (linkCounts[route] || 0) + 1 } else { brokenLinks.push(`${route} -> ${link.href}`) } } } } // Find orphaned pages (pages with no inbound links) for (const page of pages) { const hasInboundLinks = internalLinks.some((link) => link.to === page.route) if (!hasInboundLinks && page.route !== "") { orphanedPages.push(page.route) } } // Analyze pages with issues for (const [route, issues] of Object.entries( analyzePageIssues(pages, contentByRoute) )) { if (issues.length > 0) { pagesWithIssues.push({ url: route, issues }) } } // Calculate average link density const totalLinks = Object.values(linkCounts).reduce( (sum, count) => sum + count, 0 ) const totalPages = Object.keys(linkCounts).length const averageLinkDensity = totalPages > 0 ? totalLinks / totalPages : 0 // Analyze page types const pageTypes: Record = {} for (const page of pages) { const type = page.contentType pageTypes[type] = (pageTypes[type] || 0) + 1 } return { totalLinks: internalLinks.length, totalPages: pages.length, internalLinks: internalLinks.length, orphanedPages, brokenLinks, pagesWithIssues, averageLinkDensity, pageTypes, } } /** * Extract links from content */ function extractLinksFromContent( content: string ): { href: string; text: string }[] { const links: { href: string; text: string }[] = [] const linkRegex = /]+href="([^"]+)"[^>]*>([^<]+)<\/a>/g let match while ((match = linkRegex.exec(content)) !== null) { links.push({ href: match[1], text: match[2].trim(), }) } return links } /** * Check if link is internal */ function isInternalLink(href: string): boolean { return ( !href.startsWith("http") && !href.startsWith("mailto:") && !href.startsWith("tel:") ) } /** * Convert link to route */ function convertLinkToRoute(href: string): string { // Remove leading slash let route = href.replace(/^\//, "") // Remove query parameters and fragments route = route.split(/[?#]/)[0] // Handle dynamic routes route = route.replace(/:\w+/g, "*") return route } /** * Analyze individual page issues */ function analyzePageIssues( pages: PageInfo[], contentByRoute: Record ): Record { const issues: Record = {} for (const [route, content] of Object.entries(contentByRoute)) { const pageIssues: string[] = [] // Check for missing title if (!content.match(/title:/i) && !content.match(/Head>/i)) { pageIssues.push("Missing title") } // Check for missing description if (!content.match(/description:/i) && !content.match(/Head>/i)) { pageIssues.push("Missing description") } // Check for too many links const linkCount = extractLinksFromContent(content).length if (linkCount > 10) { pageIssues.push(`Too many internal links (${linkCount})`) } if (pageIssues.length > 0) { issues[route] = pageIssues } } return issues } /** * Generate sitemap XML from pages */ export function generateSitemapXml(pages: PageInfo[]): string { let xml = '\n' xml += '\n' const currentDate = new Date().toISOString() for (const page of pages) { xml += " \n" xml += ` ${page.url}\n` xml += ` ${currentDate}\n` xml += ` ${page.changeFrequency}\n` xml += ` ${page.priority.toFixed(1)}\n` xml += " \n" } xml += "" return xml }