453 lines
10 KiB
JavaScript
453 lines
10 KiB
JavaScript
import fs from "fs"
|
|
import path from "path"
|
|
|
|
/**
|
|
* Page information structure
|
|
*/
|
|
export class PageInfo {
|
|
constructor(
|
|
filePath,
|
|
route,
|
|
url,
|
|
title,
|
|
priority,
|
|
changeFrequency,
|
|
lastModified,
|
|
isDynamic,
|
|
dynamicParams,
|
|
contentType
|
|
) {
|
|
this.filePath = filePath
|
|
this.route = route
|
|
this.url = url
|
|
this.title = title
|
|
this.priority = priority
|
|
this.changeFrequency = changeFrequency
|
|
this.lastModified = lastModified
|
|
this.isDynamic = isDynamic
|
|
this.dynamicParams = dynamicParams
|
|
this.contentType = contentType
|
|
}
|
|
}
|
|
|
|
/**
|
|
* SEO analysis results
|
|
*/
|
|
export class SEOAnalysis {
|
|
constructor(
|
|
totalPages,
|
|
internalLinks,
|
|
orphanedPages,
|
|
brokenLinks,
|
|
pagesWithIssues,
|
|
averageLinkDensity,
|
|
pageTypes
|
|
) {
|
|
this.totalPages = totalPages
|
|
this.internalLinks = internalLinks
|
|
this.orphanedPages = orphanedPages
|
|
this.brokenLinks = brokenLinks
|
|
this.pagesWithIssues = pagesWithIssues
|
|
this.averageLinkDensity = averageLinkDensity
|
|
this.pageTypes = pageTypes
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Discover all React pages in the app directory
|
|
*/
|
|
export function discoverPages(appDir) {
|
|
const pages = []
|
|
|
|
if (!fs.existsSync(appDir)) {
|
|
throw new Error(`App directory not found: ${appDir}`)
|
|
}
|
|
|
|
// Scan the app directory
|
|
scanDirectory(appDir, "", pages)
|
|
|
|
return pages.sort((a, b) => a.route.localeCompare(b.route))
|
|
}
|
|
|
|
/**
|
|
* Scan directory recursively for page files
|
|
*/
|
|
function scanDirectory(dir, relativePath, pages) {
|
|
const items = fs.readdirSync(dir, { withFileTypes: true })
|
|
|
|
for (const item of items) {
|
|
const fullPath = path.join(dir, item.name)
|
|
const itemRelativePath = relativePath
|
|
? path.join(relativePath, item.name)
|
|
: item.name
|
|
|
|
if (item.isDirectory()) {
|
|
// Skip special directories
|
|
if (["api", "lib", "components", "hooks"].includes(item.name)) {
|
|
continue
|
|
}
|
|
|
|
// Scan subdirectories
|
|
scanDirectory(fullPath, itemRelativePath, pages)
|
|
} else if (item.isFile()) {
|
|
// Process page files
|
|
if (isPageFile(item.name)) {
|
|
processPageFile(fullPath, itemRelativePath, pages)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if file is a page component
|
|
*/
|
|
function isPageFile(filename) {
|
|
const pageExtensions = [".tsx", ".ts", ".jsx", ".js"]
|
|
return (
|
|
pageExtensions.some((ext) => filename.endsWith(ext)) &&
|
|
(filename === "page.tsx" ||
|
|
filename === "page.ts" ||
|
|
filename === "page.jsx" ||
|
|
filename === "page.js")
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Process a page file
|
|
*/
|
|
function processPageFile(fullPath, relativePath, pages) {
|
|
const route = convertPathToRoute(relativePath)
|
|
const url = `/${route}`
|
|
const title = extractPageTitle(fullPath) || route
|
|
const priority = calculatePagePriority(route)
|
|
const changeFrequency = calculateChangeFrequency(route)
|
|
const isDynamic = isDynamicRoute(relativePath)
|
|
const dynamicParams = extractDynamicParams(relativePath)
|
|
const lastModified = getLastModifiedTime(fullPath)
|
|
const contentType = getContentType(relativePath)
|
|
|
|
pages.push(
|
|
new PageInfo(
|
|
fullPath,
|
|
route,
|
|
url,
|
|
title,
|
|
priority,
|
|
changeFrequency,
|
|
lastModified,
|
|
isDynamic,
|
|
dynamicParams,
|
|
contentType
|
|
)
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Convert file path to Next.js route
|
|
*/
|
|
function convertPathToRoute(filePath) {
|
|
// Remove 'page.tsx' extension
|
|
let route = filePath.replace(/page\.(tsx|ts|jsx|js)$/, "")
|
|
|
|
// Replace backslashes with forward slashes
|
|
route = route.replace(/\\/g, "/")
|
|
|
|
// Handle index routes
|
|
if (route === "" || route === ".") {
|
|
return ""
|
|
}
|
|
|
|
// Handle dynamic routes
|
|
route = route.replace(/\[(\w+)\]/g, ":$1")
|
|
|
|
return route
|
|
}
|
|
|
|
/**
|
|
* Extract page title from file content
|
|
*/
|
|
function extractPageTitle(filePath) {
|
|
try {
|
|
const content = fs.readFileSync(filePath, "utf8")
|
|
|
|
// Look for title in metadata or component
|
|
const titleMatch = content.match(
|
|
/export\s+default\s+function\s+\w*\s*\([^)]*\)\s*{[^}]*title:\s*['"`]([^'"`]+)['"`]/
|
|
)
|
|
if (titleMatch) {
|
|
return titleMatch[1]
|
|
}
|
|
|
|
// Look for title in metadata function
|
|
const metadataMatch = content.match(
|
|
/export\s+async\s+function\s+generateMetadata\(\)\s*{[^}]*title:\s*['"`]([^'"`]+)['"`]/
|
|
)
|
|
if (metadataMatch) {
|
|
return metadataMatch[1]
|
|
}
|
|
|
|
return null
|
|
} catch (error) {
|
|
return null
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Calculate page priority based on route
|
|
*/
|
|
function calculatePagePriority(route) {
|
|
const priorityMap = {
|
|
"": 1.0, // Home page
|
|
"about-us": 0.9,
|
|
"contact-us": 0.9,
|
|
services: 0.9,
|
|
"vending-machines": 0.9,
|
|
manuals: 0.8,
|
|
}
|
|
|
|
// Check exact match first
|
|
if (priorityMap[route]) {
|
|
return priorityMap[route]
|
|
}
|
|
|
|
// Check partial matches
|
|
for (const [key, value] of Object.entries(priorityMap)) {
|
|
if (route.startsWith(key) && route !== key) {
|
|
return Math.max(0.1, value - 0.1) // Slightly lower priority for child pages
|
|
}
|
|
}
|
|
|
|
// Default priority
|
|
return 0.5
|
|
}
|
|
|
|
/**
|
|
* Calculate change frequency based on route type
|
|
*/
|
|
function calculateChangeFrequency(route) {
|
|
if (route === "" || route === "about-us" || route === "contact-us") {
|
|
return "monthly"
|
|
}
|
|
|
|
if (route.startsWith("services") || route.startsWith("vending-machines")) {
|
|
return "weekly"
|
|
}
|
|
|
|
if (route.startsWith("manuals") || route.startsWith("blog")) {
|
|
return "daily"
|
|
}
|
|
|
|
return "monthly"
|
|
}
|
|
|
|
/**
|
|
* Check if route is dynamic
|
|
*/
|
|
function isDynamicRoute(filePath) {
|
|
return filePath.includes("[") && filePath.includes("]")
|
|
}
|
|
|
|
/**
|
|
* Extract dynamic parameters from route
|
|
*/
|
|
function extractDynamicParams(filePath) {
|
|
const matches = filePath.match(/\[(\w+)\]/g)
|
|
return matches ? matches.map((match) => match.slice(1, -1)) : []
|
|
}
|
|
|
|
/**
|
|
* Get last modified time of file
|
|
*/
|
|
function getLastModifiedTime(filePath) {
|
|
const stats = fs.statSync(filePath)
|
|
return stats.mtime
|
|
}
|
|
|
|
/**
|
|
* Determine content type based on path
|
|
*/
|
|
function getContentType(filePath) {
|
|
if (filePath.includes("api")) {
|
|
return "api"
|
|
}
|
|
if (filePath.includes("layout")) {
|
|
return "layout"
|
|
}
|
|
return "page"
|
|
}
|
|
|
|
/**
|
|
* Analyze internal links in pages
|
|
*/
|
|
export function analyzeInternalLinks(pages, contentByRoute) {
|
|
const internalLinks = []
|
|
const brokenLinks = []
|
|
const orphanedPages = []
|
|
const pagesWithIssues = []
|
|
const linkCounts = {}
|
|
|
|
// Find pages that link to other pages
|
|
for (const [route, content] of Object.entries(contentByRoute)) {
|
|
const links = extractLinksFromContent(content)
|
|
|
|
for (const link of links) {
|
|
if (isInternalLink(link.href)) {
|
|
const targetRoute = convertLinkToRoute(link.href)
|
|
|
|
if (pages.some((p) => p.route === targetRoute)) {
|
|
internalLinks.push({
|
|
from: route,
|
|
to: targetRoute,
|
|
text: link.text,
|
|
})
|
|
|
|
linkCounts[route] = (linkCounts[route] || 0) + 1
|
|
} else {
|
|
brokenLinks.push(`${route} -> ${link.href}`)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Find orphaned pages (pages with no inbound links)
|
|
for (const page of pages) {
|
|
const hasInboundLinks = internalLinks.some((link) => link.to === page.route)
|
|
if (!hasInboundLinks && page.route !== "") {
|
|
orphanedPages.push(page.route)
|
|
}
|
|
}
|
|
|
|
// Analyze pages with issues
|
|
for (const [route, issues] of Object.entries(
|
|
analyzePageIssues(pages, contentByRoute)
|
|
)) {
|
|
if (issues.length > 0) {
|
|
pagesWithIssues.push({ url: route, issues })
|
|
}
|
|
}
|
|
|
|
// Calculate average link density
|
|
const totalLinks = Object.values(linkCounts).reduce(
|
|
(sum, count) => sum + count,
|
|
0
|
|
)
|
|
const totalPages = Object.keys(linkCounts).length
|
|
const averageLinkDensity = totalPages > 0 ? totalLinks / totalPages : 0
|
|
|
|
// Analyze page types
|
|
const pageTypes = {}
|
|
for (const page of pages) {
|
|
const type = page.contentType
|
|
pageTypes[type] = (pageTypes[type] || 0) + 1
|
|
}
|
|
|
|
return new SEOAnalysis(
|
|
pages.length,
|
|
internalLinks.length,
|
|
orphanedPages,
|
|
brokenLinks,
|
|
pagesWithIssues,
|
|
averageLinkDensity,
|
|
pageTypes
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Extract links from content
|
|
*/
|
|
function extractLinksFromContent(content) {
|
|
const links = []
|
|
const linkRegex = /<a[^>]+href="([^"]+)"[^>]*>([^<]+)<\/a>/g
|
|
|
|
let match
|
|
while ((match = linkRegex.exec(content)) !== null) {
|
|
links.push({
|
|
href: match[1],
|
|
text: match[2].trim(),
|
|
})
|
|
}
|
|
|
|
return links
|
|
}
|
|
|
|
/**
|
|
* Check if link is internal
|
|
*/
|
|
function isInternalLink(href) {
|
|
return (
|
|
!href.startsWith("http") &&
|
|
!href.startsWith("mailto:") &&
|
|
!href.startsWith("tel:")
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Convert link to route
|
|
*/
|
|
function convertLinkToRoute(href) {
|
|
// Remove leading slash
|
|
let route = href.replace(/^\//, "")
|
|
|
|
// Remove query parameters and fragments
|
|
route = route.split(/[?#]/)[0]
|
|
|
|
// Handle dynamic routes
|
|
route = route.replace(/:\w+/g, "*")
|
|
|
|
return route
|
|
}
|
|
|
|
/**
|
|
* Analyze individual page issues
|
|
*/
|
|
function analyzePageIssues(pages, contentByRoute) {
|
|
const issues = {}
|
|
|
|
for (const [route, content] of Object.entries(contentByRoute)) {
|
|
const pageIssues = []
|
|
|
|
// Check for missing title
|
|
if (!content.match(/title:/i) && !content.match(/Head>/i)) {
|
|
pageIssues.push("Missing title")
|
|
}
|
|
|
|
// Check for missing description
|
|
if (!content.match(/description:/i) && !content.match(/Head>/i)) {
|
|
pageIssues.push("Missing description")
|
|
}
|
|
|
|
// Check for too many links
|
|
const linkCount = extractLinksFromContent(content).length
|
|
if (linkCount > 10) {
|
|
pageIssues.push(`Too many internal links (${linkCount})`)
|
|
}
|
|
|
|
if (pageIssues.length > 0) {
|
|
issues[route] = pageIssues
|
|
}
|
|
}
|
|
|
|
return issues
|
|
}
|
|
|
|
/**
|
|
* Generate sitemap XML from pages
|
|
*/
|
|
export function generateSitemapXml(pages) {
|
|
let xml = '<?xml version="1.0" encoding="UTF-8"?>\n'
|
|
xml += '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'
|
|
|
|
const currentDate = new Date().toISOString()
|
|
|
|
for (const page of pages) {
|
|
xml += " <url>\n"
|
|
xml += ` <loc>${page.url}</loc>\n`
|
|
xml += ` <lastmod>${currentDate}</lastmod>\n`
|
|
xml += ` <changefreq>${page.changeFrequency}</changefreq>\n`
|
|
xml += ` <priority>${page.priority.toFixed(1)}</priority>\n`
|
|
xml += " </url>\n"
|
|
}
|
|
|
|
xml += "</urlset>"
|
|
return xml
|
|
}
|