Rocky_Mountain_Vending/lib/seo-utils.ts

469 lines
11 KiB
TypeScript

import fs from "fs"
import path from "path"
/**
* Page information structure
*/
export interface PageInfo {
filePath: string
route: string
url: string
title: string
priority: number
changeFrequency:
| "always"
| "hourly"
| "daily"
| "weekly"
| "monthly"
| "yearly"
| "never"
lastModified: Date
isDynamic: boolean
dynamicParams?: string[]
contentType: "page" | "layout" | "api"
}
/**
* Dynamic route information
*/
export interface DynamicRouteInfo {
pattern: string
paramName: string
example: string
}
/**
* SEO analysis results
*/
export interface SEOAnalysis {
totalLinks: number
totalPages: number
internalLinks: number
orphanedPages: string[]
brokenLinks: string[]
pagesWithIssues: {
url: string
issues: string[]
}[]
averageLinkDensity: number
pageTypes: {
[key: string]: number
}
}
/**
* Discover all React pages in the app directory
*/
export function discoverPages(appDir: string): PageInfo[] {
const pages: PageInfo[] = []
if (!fs.existsSync(appDir)) {
throw new Error(`App directory not found: ${appDir}`)
}
// Scan the app directory
scanDirectory(appDir, "", pages)
return pages.sort((a, b) => a.route.localeCompare(b.route))
}
/**
* Scan directory recursively for page files
*/
function scanDirectory(
dir: string,
relativePath: string,
pages: PageInfo[]
): void {
const items = fs.readdirSync(dir, { withFileTypes: true })
for (const item of items) {
const fullPath = path.join(dir, item.name)
const itemRelativePath = relativePath
? path.join(relativePath, item.name)
: item.name
if (item.isDirectory()) {
// Skip special directories
if (["api", "lib", "components", "hooks"].includes(item.name)) {
continue
}
// Scan subdirectories
scanDirectory(fullPath, itemRelativePath, pages)
} else if (item.isFile()) {
// Process page files
if (isPageFile(item.name)) {
processPageFile(fullPath, itemRelativePath, pages)
}
}
}
}
/**
* Check if file is a page component
*/
function isPageFile(filename: string): boolean {
const pageExtensions = [".tsx", ".ts", ".jsx", ".js"]
return (
pageExtensions.some((ext) => filename.endsWith(ext)) &&
(filename === "page.tsx" ||
filename === "page.ts" ||
filename === "page.jsx" ||
filename === "page.js")
)
}
/**
* Process a page file
*/
function processPageFile(
fullPath: string,
relativePath: string,
pages: PageInfo[]
): void {
const route = convertPathToRoute(relativePath)
const url = `/${route}`
const title = extractPageTitle(fullPath) || route
const priority = calculatePagePriority(route)
const changeFrequency = calculateChangeFrequency(route)
const isDynamic = isDynamicRoute(relativePath)
const dynamicParams = extractDynamicParams(relativePath)
const lastModified = getLastModifiedTime(fullPath)
const contentType = getContentType(relativePath)
pages.push({
filePath: fullPath,
route,
url,
title,
priority,
changeFrequency,
lastModified,
isDynamic,
dynamicParams,
contentType,
})
}
/**
* Convert file path to Next.js route
*/
function convertPathToRoute(filePath: string): string {
// Remove 'page.tsx' extension
let route = filePath.replace(/page\.(tsx|ts|jsx|js)$/, "")
// Replace backslashes with forward slashes
route = route.replace(/\\/g, "/")
// Handle index routes
if (route === "" || route === ".") {
return ""
}
// Handle dynamic routes
route = route.replace(/\[(\w+)\]/g, ":$1")
return route
}
/**
* Extract page title from file content
*/
function extractPageTitle(filePath: string): string | null {
try {
const content = fs.readFileSync(filePath, "utf8")
// Look for title in metadata or component
const titleMatch = content.match(
/export\s+default\s+function\s+\w*\s*\([^)]*\)\s*{[^}]*title:\s*['"`]([^'"`]+)['"`]/
)
if (titleMatch) {
return titleMatch[1]
}
// Look for title in metadata function
const metadataMatch = content.match(
/export\s+async\s+function\s+generateMetadata\(\)\s*{[^}]*title:\s*['"`]([^'"`]+)['"`]/
)
if (metadataMatch) {
return metadataMatch[1]
}
return null
} catch (error) {
return null
}
}
/**
* Calculate page priority based on route
*/
function calculatePagePriority(route: string): number {
const priorityMap: Record<string, number> = {
"": 1.0, // Home page
"about-us": 0.9,
"contact-us": 0.9,
services: 0.9,
"vending-machines": 0.9,
manuals: 0.8,
}
// Check exact match first
if (priorityMap[route]) {
return priorityMap[route]
}
// Check partial matches
for (const [key, value] of Object.entries(priorityMap)) {
if (route.startsWith(key) && route !== key) {
return Math.max(0.1, value - 0.1) // Slightly lower priority for child pages
}
}
// Default priority
return 0.5
}
/**
* Calculate change frequency based on route type
*/
function calculateChangeFrequency(
route: string
): "always" | "hourly" | "daily" | "weekly" | "monthly" | "yearly" | "never" {
if (route === "" || route === "about-us" || route === "contact-us") {
return "monthly"
}
if (route.startsWith("services") || route.startsWith("vending-machines")) {
return "weekly"
}
if (route.startsWith("manuals") || route.startsWith("blog")) {
return "daily"
}
return "monthly"
}
/**
* Check if route is dynamic
*/
function isDynamicRoute(filePath: string): boolean {
return filePath.includes("[") && filePath.includes("]")
}
/**
* Extract dynamic parameters from route
*/
function extractDynamicParams(filePath: string): string[] {
const matches = filePath.match(/\[(\w+)\]/g)
return matches ? matches.map((match) => match.slice(1, -1)) : []
}
/**
* Get last modified time of file
*/
function getLastModifiedTime(filePath: string): Date {
const stats = fs.statSync(filePath)
return stats.mtime
}
/**
* Determine content type based on path
*/
function getContentType(filePath: string): "page" | "layout" | "api" {
if (filePath.includes("api")) {
return "api"
}
if (filePath.includes("layout")) {
return "layout"
}
return "page"
}
/**
* Analyze internal links in pages
*/
export function analyzeInternalLinks(
pages: PageInfo[],
contentByRoute: Record<string, string>
): SEOAnalysis {
const internalLinks: { from: string; to: string; text: string }[] = []
const brokenLinks: string[] = []
const orphanedPages: string[] = []
const pagesWithIssues: { url: string; issues: string[] }[] = []
const linkCounts: Record<string, number> = {}
// Find pages that link to other pages
for (const [route, content] of Object.entries(contentByRoute)) {
const links = extractLinksFromContent(content)
for (const link of links) {
if (isInternalLink(link.href)) {
const targetRoute = convertLinkToRoute(link.href)
if (pages.some((p) => p.route === targetRoute)) {
internalLinks.push({
from: route,
to: targetRoute,
text: link.text,
})
linkCounts[route] = (linkCounts[route] || 0) + 1
} else {
brokenLinks.push(`${route} -> ${link.href}`)
}
}
}
}
// Find orphaned pages (pages with no inbound links)
for (const page of pages) {
const hasInboundLinks = internalLinks.some((link) => link.to === page.route)
if (!hasInboundLinks && page.route !== "") {
orphanedPages.push(page.route)
}
}
// Analyze pages with issues
for (const [route, issues] of Object.entries(
analyzePageIssues(pages, contentByRoute)
)) {
if (issues.length > 0) {
pagesWithIssues.push({ url: route, issues })
}
}
// Calculate average link density
const totalLinks = Object.values(linkCounts).reduce(
(sum, count) => sum + count,
0
)
const totalPages = Object.keys(linkCounts).length
const averageLinkDensity = totalPages > 0 ? totalLinks / totalPages : 0
// Analyze page types
const pageTypes: Record<string, number> = {}
for (const page of pages) {
const type = page.contentType
pageTypes[type] = (pageTypes[type] || 0) + 1
}
return {
totalLinks: internalLinks.length,
totalPages: pages.length,
internalLinks: internalLinks.length,
orphanedPages,
brokenLinks,
pagesWithIssues,
averageLinkDensity,
pageTypes,
}
}
/**
* Extract links from content
*/
function extractLinksFromContent(
content: string
): { href: string; text: string }[] {
const links: { href: string; text: string }[] = []
const linkRegex = /<a[^>]+href="([^"]+)"[^>]*>([^<]+)<\/a>/g
let match
while ((match = linkRegex.exec(content)) !== null) {
links.push({
href: match[1],
text: match[2].trim(),
})
}
return links
}
/**
* Check if link is internal
*/
function isInternalLink(href: string): boolean {
return (
!href.startsWith("http") &&
!href.startsWith("mailto:") &&
!href.startsWith("tel:")
)
}
/**
* Convert link to route
*/
function convertLinkToRoute(href: string): string {
// Remove leading slash
let route = href.replace(/^\//, "")
// Remove query parameters and fragments
route = route.split(/[?#]/)[0]
// Handle dynamic routes
route = route.replace(/:\w+/g, "*")
return route
}
/**
* Analyze individual page issues
*/
function analyzePageIssues(
pages: PageInfo[],
contentByRoute: Record<string, string>
): Record<string, string[]> {
const issues: Record<string, string[]> = {}
for (const [route, content] of Object.entries(contentByRoute)) {
const pageIssues: string[] = []
// Check for missing title
if (!content.match(/title:/i) && !content.match(/Head>/i)) {
pageIssues.push("Missing title")
}
// Check for missing description
if (!content.match(/description:/i) && !content.match(/Head>/i)) {
pageIssues.push("Missing description")
}
// Check for too many links
const linkCount = extractLinksFromContent(content).length
if (linkCount > 10) {
pageIssues.push(`Too many internal links (${linkCount})`)
}
if (pageIssues.length > 0) {
issues[route] = pageIssues
}
}
return issues
}
/**
* Generate sitemap XML from pages
*/
export function generateSitemapXml(pages: PageInfo[]): string {
let xml = '<?xml version="1.0" encoding="UTF-8"?>\n'
xml += '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'
const currentDate = new Date().toISOString()
for (const page of pages) {
xml += " <url>\n"
xml += ` <loc>${page.url}</loc>\n`
xml += ` <lastmod>${currentDate}</lastmod>\n`
xml += ` <changefreq>${page.changeFrequency}</changefreq>\n`
xml += ` <priority>${page.priority.toFixed(1)}</priority>\n`
xml += " </url>\n"
}
xml += "</urlset>"
return xml
}