Rocky_Mountain_Vending/lib/seo-utils.ts

import fs from "fs"
import path from "path"

/**
 * Page information structure
 */
export interface PageInfo {
  filePath: string
  route: string
  url: string
  title: string
  priority: number
  changeFrequency:
    | "always"
    | "hourly"
    | "daily"
    | "weekly"
    | "monthly"
    | "yearly"
    | "never"
  lastModified: Date
  isDynamic: boolean
  dynamicParams?: string[]
  contentType: "page" | "layout" | "api"
}

/**
 * Dynamic route information
 */
export interface DynamicRouteInfo {
  pattern: string
  paramName: string
  example: string
}

/**
 * SEO analysis results
 */
export interface SEOAnalysis {
  totalLinks: number
  totalPages: number
  internalLinks: number
  orphanedPages: string[]
  brokenLinks: string[]
  pagesWithIssues: {
    url: string
    issues: string[]
  }[]
  averageLinkDensity: number
  pageTypes: {
    [key: string]: number
  }
}

/**
 * Discover all React pages in the app directory
 */
export function discoverPages(appDir: string): PageInfo[] {
  const pages: PageInfo[] = []

  if (!fs.existsSync(appDir)) {
    throw new Error(`App directory not found: ${appDir}`)
  }

  // Scan the app directory
  scanDirectory(appDir, "", pages)

  return pages.sort((a, b) => a.route.localeCompare(b.route))
}

/**
 * Scan directory recursively for page files
 */
function scanDirectory(
  dir: string,
  relativePath: string,
  pages: PageInfo[]
): void {
  const items = fs.readdirSync(dir, { withFileTypes: true })

  for (const item of items) {
    const fullPath = path.join(dir, item.name)
    const itemRelativePath = relativePath
      ? path.join(relativePath, item.name)
      : item.name

    if (item.isDirectory()) {
      // Skip special directories
      if (["api", "lib", "components", "hooks"].includes(item.name)) {
        continue
      }

      // Scan subdirectories
      scanDirectory(fullPath, itemRelativePath, pages)
    } else if (item.isFile()) {
      // Process page files
      if (isPageFile(item.name)) {
        processPageFile(fullPath, itemRelativePath, pages)
      }
    }
  }
}

/**
 * Check if file is a page component
 */
function isPageFile(filename: string): boolean {
  const pageExtensions = [".tsx", ".ts", ".jsx", ".js"]
  return (
    pageExtensions.some((ext) => filename.endsWith(ext)) &&
    (filename === "page.tsx" ||
      filename === "page.ts" ||
      filename === "page.jsx" ||
      filename === "page.js")
  )
}

/**
 * Process a page file
 */
function processPageFile(
  fullPath: string,
  relativePath: string,
  pages: PageInfo[]
): void {
  const route = convertPathToRoute(relativePath)
  const url = `/${route}`
  const title = extractPageTitle(fullPath) || route
  const priority = calculatePagePriority(route)
  const changeFrequency = calculateChangeFrequency(route)
  const isDynamic = isDynamicRoute(relativePath)
  const dynamicParams = extractDynamicParams(relativePath)
  const lastModified = getLastModifiedTime(fullPath)
  const contentType = getContentType(relativePath)

  pages.push({
    filePath: fullPath,
    route,
    url,
    title,
    priority,
    changeFrequency,
    lastModified,
    isDynamic,
    dynamicParams,
    contentType,
  })
}

/**
 * Convert file path to Next.js route
 */
function convertPathToRoute(filePath: string): string {
  // Remove 'page.tsx' extension
  let route = filePath.replace(/page\.(tsx|ts|jsx|js)$/, "")

  // Replace backslashes with forward slashes
  route = route.replace(/\\/g, "/")

  // Handle index routes
  if (route === "" || route === ".") {
    return ""
  }

  // Handle dynamic routes
  route = route.replace(/\[(\w+)\]/g, ":$1")

  return route
}

/**
 * Extract page title from file content
 */
function extractPageTitle(filePath: string): string | null {
  try {
    const content = fs.readFileSync(filePath, "utf8")

    // Look for title in metadata or component
    const titleMatch = content.match(
      /export\s+default\s+function\s+\w*\s*\([^)]*\)\s*{[^}]*title:\s*['"`]([^'"`]+)['"`]/
    )
    if (titleMatch) {
      return titleMatch[1]
    }

    // Look for title in metadata function
    const metadataMatch = content.match(
      /export\s+async\s+function\s+generateMetadata\(\)\s*{[^}]*title:\s*['"`]([^'"`]+)['"`]/
    )
    if (metadataMatch) {
      return metadataMatch[1]
    }

    return null
  } catch (error) {
    return null
  }
}

/**
 * Calculate page priority based on route
 */
function calculatePagePriority(route: string): number {
  const priorityMap: Record<string, number> = {
    "": 1.0, // Home page
    "about-us": 0.9,
    "contact-us": 0.9,
    services: 0.9,
    "vending-machines": 0.9,
    manuals: 0.8,
  }

  // Check exact match first
  if (priorityMap[route]) {
    return priorityMap[route]
  }

  // Check partial matches
  for (const [key, value] of Object.entries(priorityMap)) {
    if (route.startsWith(key) && route !== key) {
      return Math.max(0.1, value - 0.1) // Slightly lower priority for child pages
    }
  }

  // Default priority
  return 0.5
}

/**
 * Calculate change frequency based on route type
 */
function calculateChangeFrequency(
  route: string
): "always" | "hourly" | "daily" | "weekly" | "monthly" | "yearly" | "never" {
  if (route === "" || route === "about-us" || route === "contact-us") {
    return "monthly"
  }

  if (route.startsWith("services") || route.startsWith("vending-machines")) {
    return "weekly"
  }

  if (route.startsWith("manuals") || route.startsWith("blog")) {
    return "daily"
  }

  return "monthly"
}

/**
 * Check if route is dynamic
 */
function isDynamicRoute(filePath: string): boolean {
  return filePath.includes("[") && filePath.includes("]")
}

/**
 * Extract dynamic parameters from route
 */
function extractDynamicParams(filePath: string): string[] {
  const matches = filePath.match(/\[(\w+)\]/g)
  return matches ? matches.map((match) => match.slice(1, -1)) : []
}

/**
 * Get last modified time of file
 */
function getLastModifiedTime(filePath: string): Date {
  const stats = fs.statSync(filePath)
  return stats.mtime
}

/**
 * Determine content type based on path
 */
function getContentType(filePath: string): "page" | "layout" | "api" {
  if (filePath.includes("api")) {
    return "api"
  }
  if (filePath.includes("layout")) {
    return "layout"
  }
  return "page"
}

/**
 * Analyze internal links in pages
 */
export function analyzeInternalLinks(
  pages: PageInfo[],
  contentByRoute: Record<string, string>
): SEOAnalysis {
  const internalLinks: { from: string; to: string; text: string }[] = []
  const brokenLinks: string[] = []
  const orphanedPages: string[] = []
  const pagesWithIssues: { url: string; issues: string[] }[] = []
  const linkCounts: Record<string, number> = {}

  // Find pages that link to other pages
  for (const [route, content] of Object.entries(contentByRoute)) {
    const links = extractLinksFromContent(content)

    for (const link of links) {
      if (isInternalLink(link.href)) {
        const targetRoute = convertLinkToRoute(link.href)

        if (pages.some((p) => p.route === targetRoute)) {
          internalLinks.push({
            from: route,
            to: targetRoute,
            text: link.text,
          })

          linkCounts[route] = (linkCounts[route] || 0) + 1
        } else {
          brokenLinks.push(`${route} -> ${link.href}`)
        }
      }
    }
  }

  // Find orphaned pages (pages with no inbound links)
  for (const page of pages) {
    const hasInboundLinks = internalLinks.some((link) => link.to === page.route)
    if (!hasInboundLinks && page.route !== "") {
      orphanedPages.push(page.route)
    }
  }

  // Analyze pages with issues
  for (const [route, issues] of Object.entries(
    analyzePageIssues(pages, contentByRoute)
  )) {
    if (issues.length > 0) {
      pagesWithIssues.push({ url: route, issues })
    }
  }

  // Calculate average link density
  const totalLinks = Object.values(linkCounts).reduce(
    (sum, count) => sum + count,
    0
  )
  const totalPages = Object.keys(linkCounts).length
  const averageLinkDensity = totalPages > 0 ? totalLinks / totalPages : 0

  // Analyze page types
  const pageTypes: Record<string, number> = {}
  for (const page of pages) {
    const type = page.contentType
    pageTypes[type] = (pageTypes[type] || 0) + 1
  }

  return {
    totalLinks: internalLinks.length,
    totalPages: pages.length,
    internalLinks: internalLinks.length,
    orphanedPages,
    brokenLinks,
    pagesWithIssues,
    averageLinkDensity,
    pageTypes,
  }
}

/**
 * Extract links from content
 */
function extractLinksFromContent(
  content: string
): { href: string; text: string }[] {
  const links: { href: string; text: string }[] = []
  const linkRegex = /<a[^>]+href="([^"]+)"[^>]*>([^<]+)<\/a>/g

  let match
  while ((match = linkRegex.exec(content)) !== null) {
    links.push({
      href: match[1],
      text: match[2].trim(),
    })
  }

  return links
}

/**
 * Check if link is internal
 */
function isInternalLink(href: string): boolean {
  return (
    !href.startsWith("http") &&
    !href.startsWith("mailto:") &&
    !href.startsWith("tel:")
  )
}

/**
 * Convert link to route
 */
function convertLinkToRoute(href: string): string {
  // Remove leading slash
  let route = href.replace(/^\//, "")

  // Remove query parameters and fragments
  route = route.split(/[?#]/)[0]

  // Handle dynamic routes
  route = route.replace(/:\w+/g, "*")

  return route
}

/**
 * Analyze individual page issues
 */
function analyzePageIssues(
  pages: PageInfo[],
  contentByRoute: Record<string, string>
): Record<string, string[]> {
  const issues: Record<string, string[]> = {}

  for (const [route, content] of Object.entries(contentByRoute)) {
    const pageIssues: string[] = []

    // Check for missing title
    if (!content.match(/title:/i) && !content.match(/Head>/i)) {
      pageIssues.push("Missing title")
    }

    // Check for missing description
    if (!content.match(/description:/i) && !content.match(/Head>/i)) {
      pageIssues.push("Missing description")
    }

    // Check for too many links
    const linkCount = extractLinksFromContent(content).length
    if (linkCount > 10) {
      pageIssues.push(`Too many internal links (${linkCount})`)
    }

    if (pageIssues.length > 0) {
      issues[route] = pageIssues
    }
  }

  return issues
}

/**
 * Generate sitemap XML from pages
 */
export function generateSitemapXml(pages: PageInfo[]): string {
  let xml = '<?xml version="1.0" encoding="UTF-8"?>\n'
  xml += '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'

  const currentDate = new Date().toISOString()

  for (const page of pages) {
    xml += "  <url>\n"
    xml += `    <loc>${page.url}</loc>\n`
    xml += `    <lastmod>${currentDate}</lastmod>\n`
    xml += `    <changefreq>${page.changeFrequency}</changefreq>\n`
    xml += `    <priority>${page.priority.toFixed(1)}</priority>\n`
    xml += "  </url>\n"
  }

  xml += "</urlset>"
  return xml
}