Next.js website for Rocky Mountain Vending company featuring: - Product catalog with Stripe integration - Service areas and parts pages - Admin dashboard with Clerk authentication - SEO optimized pages with JSON-LD structured data Co-authored-by: Cursor <cursoragent@cursor.com>
395 lines
12 KiB
JavaScript
395 lines
12 KiB
JavaScript
import fs from 'fs';
|
|
import path from 'path';
|
|
import { fileURLToPath } from 'url';
|
|
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
const __dirname = path.dirname(__filename);
|
|
|
|
// Get the project root (two levels up from scripts/)
|
|
const PROJECT_ROOT = path.join(__dirname, '../..');
|
|
const WORDPRESS_DATA_PATH = path.join(__dirname, '../lib/wordpress-data/processed-content.json');
|
|
const CONFIG_PATH = path.join(__dirname, '../lib/internal-links-config.json');
|
|
const OUTPUT_PATH = path.join(__dirname, '../lib/wordpress-data/processed-content.json');
|
|
|
|
/**
|
|
* Load WordPress data
|
|
*/
|
|
function loadWordPressData() {
|
|
try {
|
|
const data = JSON.parse(fs.readFileSync(WORDPRESS_DATA_PATH, 'utf8'));
|
|
return data;
|
|
} catch (error) {
|
|
console.error('Error loading WordPress data:', error);
|
|
return { pages: [], posts: [] };
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Load configuration
|
|
*/
|
|
function loadConfig() {
|
|
try {
|
|
const config = JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf8'));
|
|
return config;
|
|
} catch (error) {
|
|
console.error('Error loading config:', error);
|
|
return {};
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get route for a page slug
|
|
*/
|
|
function getRouteForSlug(slug, routeMapping) {
|
|
// Check if slug is in route mapping values
|
|
for (const [route, mappedSlug] of Object.entries(routeMapping)) {
|
|
if (mappedSlug === slug) {
|
|
return `/${route}`;
|
|
}
|
|
}
|
|
// Default to slug-based route
|
|
return `/${slug}`;
|
|
}
|
|
|
|
/**
|
|
* Extract text content from HTML (removing tags)
|
|
*/
|
|
function extractTextFromHTML(html) {
|
|
if (!html || typeof html !== 'string') return '';
|
|
// Remove script and style tags
|
|
let text = html.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '');
|
|
text = text.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '');
|
|
// Remove HTML tags but keep text
|
|
text = text.replace(/<[^>]+>/g, ' ');
|
|
// Decode HTML entities
|
|
text = text.replace(/ /g, ' ');
|
|
text = text.replace(/&/g, '&');
|
|
text = text.replace(/</g, '<');
|
|
text = text.replace(/>/g, '>');
|
|
text = text.replace(/"/g, '"');
|
|
text = text.replace(/’/g, "'");
|
|
text = text.replace(/–/g, '-');
|
|
text = text.replace(/—/g, '—');
|
|
text = text.replace(/…/g, '...');
|
|
// Clean up whitespace
|
|
text = text.replace(/\s+/g, ' ').trim();
|
|
return text;
|
|
}
|
|
|
|
/**
|
|
* Check if text already contains a link
|
|
*/
|
|
function hasExistingLink(html, startIndex, endIndex) {
|
|
const before = html.substring(Math.max(0, startIndex - 100), startIndex);
|
|
const after = html.substring(endIndex, Math.min(html.length, endIndex + 100));
|
|
// Check if there's an <a> tag nearby
|
|
const linkRegex = /<a[^>]*>/i;
|
|
return linkRegex.test(before + after);
|
|
}
|
|
|
|
/**
|
|
* Check if position is inside an existing link
|
|
*/
|
|
function isInsideLink(html, position) {
|
|
const before = html.substring(0, position);
|
|
const openTags = (before.match(/<a[^>]*>/gi) || []).length;
|
|
const closeTags = (before.match(/<\/a>/gi) || []).length;
|
|
return openTags > closeTags;
|
|
}
|
|
|
|
/**
|
|
* Find keyword matches in content
|
|
*/
|
|
function findKeywordMatches(content, keywords, caseSensitive = false) {
|
|
const matches = [];
|
|
const flags = caseSensitive ? 'g' : 'gi';
|
|
|
|
keywords.forEach(keyword => {
|
|
const regex = new RegExp(`\\b${keyword.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, flags);
|
|
let match;
|
|
while ((match = regex.exec(content)) !== null) {
|
|
matches.push({
|
|
keyword,
|
|
index: match.index,
|
|
length: match[0].length,
|
|
text: match[0]
|
|
});
|
|
}
|
|
});
|
|
|
|
// Sort by index
|
|
matches.sort((a, b) => a.index - b.index);
|
|
return matches;
|
|
}
|
|
|
|
/**
|
|
* Insert link into HTML content
|
|
*/
|
|
function insertLink(html, startIndex, endIndex, url, anchorText) {
|
|
const before = html.substring(0, startIndex);
|
|
const after = html.substring(endIndex);
|
|
const link = `<a href="${url}">${anchorText}</a>`;
|
|
return before + link + after;
|
|
}
|
|
|
|
/**
|
|
* Generate internal links for a page
|
|
*/
|
|
function generateLinksForPage(page, allPages, config, routeMapping) {
|
|
if (!page.content || typeof page.content !== 'string') {
|
|
return { ...page, linksAdded: 0 };
|
|
}
|
|
|
|
// Skip excluded pages
|
|
if (config.excludedPages && config.excludedPages.includes(page.slug)) {
|
|
return { ...page, linksAdded: 0 };
|
|
}
|
|
|
|
let content = page.content;
|
|
let linksAdded = 0;
|
|
const linkPositions = [];
|
|
const maxLinks = config.linkDensity?.maxLinksPerPage || 10;
|
|
const minWordsBetween = config.linkDensity?.minWordsBetweenLinks || 50;
|
|
|
|
// Build page index for quick lookup
|
|
const pageIndex = {};
|
|
allPages.forEach(p => {
|
|
pageIndex[p.slug] = {
|
|
slug: p.slug,
|
|
title: p.title,
|
|
url: getRouteForSlug(p.slug, routeMapping)
|
|
};
|
|
});
|
|
|
|
// Process priority links first
|
|
if (config.priorityLinks) {
|
|
for (const [targetSlug, linkConfig] of Object.entries(config.priorityLinks)) {
|
|
if (linksAdded >= maxLinks) break;
|
|
if (page.slug === targetSlug) continue; // Don't link to self
|
|
if (!pageIndex[targetSlug]) continue;
|
|
|
|
const keywords = linkConfig.keywords || [];
|
|
const maxLinksForThis = linkConfig.maxLinks || 2;
|
|
const anchorTexts = linkConfig.anchorText || [pageIndex[targetSlug].title];
|
|
|
|
const textContent = extractTextFromHTML(content);
|
|
const matches = findKeywordMatches(textContent, keywords);
|
|
|
|
let linksAddedForThis = 0;
|
|
for (const match of matches) {
|
|
if (linksAdded >= maxLinks || linksAddedForThis >= maxLinksForThis) break;
|
|
|
|
// Check minimum distance from other links
|
|
const tooClose = linkPositions.some(pos =>
|
|
Math.abs(pos - match.index) < minWordsBetween * 5 // Rough estimate: 5 chars per word
|
|
);
|
|
|
|
if (tooClose) continue;
|
|
|
|
// Check if already inside a link
|
|
if (isInsideLink(content, match.index)) continue;
|
|
|
|
// Find the actual position in HTML (accounting for HTML tags)
|
|
const htmlMatch = findKeywordInHTML(content, match.text, match.index, textContent);
|
|
if (!htmlMatch) continue;
|
|
|
|
const anchorText = anchorTexts[linksAddedForThis % anchorTexts.length];
|
|
const url = pageIndex[targetSlug].url;
|
|
|
|
content = insertLink(content, htmlMatch.start, htmlMatch.end, url, anchorText);
|
|
linkPositions.push(htmlMatch.start);
|
|
linksAdded++;
|
|
linksAddedForThis++;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Process keyword mappings
|
|
if (config.keywordMappings && linksAdded < maxLinks) {
|
|
const textContent = extractTextFromHTML(content);
|
|
|
|
for (const [keyword, targetSlug] of Object.entries(config.keywordMappings)) {
|
|
if (linksAdded >= maxLinks) break;
|
|
if (page.slug === targetSlug) continue;
|
|
if (!pageIndex[targetSlug]) continue;
|
|
|
|
const matches = findKeywordMatches(textContent, [keyword]);
|
|
|
|
for (const match of matches) {
|
|
if (linksAdded >= maxLinks) break;
|
|
|
|
// Check minimum distance
|
|
const tooClose = linkPositions.some(pos =>
|
|
Math.abs(pos - match.index) < minWordsBetween * 5
|
|
);
|
|
|
|
if (tooClose) continue;
|
|
|
|
// Check if already inside a link
|
|
if (isInsideLink(content, match.index)) continue;
|
|
|
|
const htmlMatch = findKeywordInHTML(content, match.text, match.index, textContent);
|
|
if (!htmlMatch) continue;
|
|
|
|
const targetPage = allPages.find(p => p.slug === targetSlug);
|
|
const anchorText = targetPage?.title || keyword;
|
|
const url = pageIndex[targetSlug].url;
|
|
|
|
content = insertLink(content, htmlMatch.start, htmlMatch.end, url, anchorText);
|
|
linkPositions.push(htmlMatch.start);
|
|
linksAdded++;
|
|
break; // Only link first occurrence per keyword
|
|
}
|
|
}
|
|
}
|
|
|
|
return {
|
|
...page,
|
|
content,
|
|
linksAdded
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Find keyword position in HTML accounting for tags
|
|
* Uses a more reliable approach: search for the keyword in HTML text nodes
|
|
*/
|
|
function findKeywordInHTML(html, keyword, textIndex, textContent) {
|
|
// Create a regex to find the keyword as a whole word, case-insensitive
|
|
const keywordEscaped = keyword.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
const regex = new RegExp(`\\b${keywordEscaped}\\b`, 'gi');
|
|
|
|
// Find all matches in HTML (but not inside tags or existing links)
|
|
let match;
|
|
let matchCount = 0;
|
|
const textBeforeMatch = textContent.substring(0, textIndex);
|
|
const wordCountBefore = textBeforeMatch.split(/\s+/).length;
|
|
|
|
// Reset regex
|
|
regex.lastIndex = 0;
|
|
|
|
while ((match = regex.exec(html)) !== null) {
|
|
const matchStart = match.index;
|
|
const matchEnd = matchStart + match[0].length;
|
|
|
|
// Check if inside a tag or existing link
|
|
const beforeMatch = html.substring(Math.max(0, matchStart - 50), matchStart);
|
|
const afterMatch = html.substring(matchEnd, Math.min(html.length, matchEnd + 50));
|
|
|
|
// Skip if inside an HTML tag
|
|
if (beforeMatch.includes('<') && !beforeMatch.includes('>')) {
|
|
continue;
|
|
}
|
|
|
|
// Skip if inside an existing link
|
|
const openLinks = (beforeMatch.match(/<a[^>]*>/gi) || []).length;
|
|
const closeLinks = (beforeMatch.match(/<\/a>/gi) || []).length;
|
|
if (openLinks > closeLinks) {
|
|
continue;
|
|
}
|
|
|
|
// Count words before this match in HTML
|
|
const htmlBeforeMatch = html.substring(0, matchStart);
|
|
const textBefore = extractTextFromHTML(htmlBeforeMatch);
|
|
const wordCount = textBefore.split(/\s+/).length;
|
|
|
|
// If this match is close to our target word count, use it
|
|
if (Math.abs(wordCount - wordCountBefore) < 10) {
|
|
return {
|
|
start: matchStart,
|
|
end: matchEnd
|
|
};
|
|
}
|
|
|
|
matchCount++;
|
|
// Limit search to first 20 matches
|
|
if (matchCount > 20) break;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Main function
|
|
*/
|
|
function main() {
|
|
console.log('Loading WordPress data...');
|
|
const data = loadWordPressData();
|
|
|
|
console.log('Loading configuration...');
|
|
const config = loadConfig();
|
|
|
|
// Load route mapping from the page.tsx file (simplified version)
|
|
const routeMapping = {
|
|
'services/repairs': 'vending-machine-repairs',
|
|
'services/moving': 'vending-machine-repairs',
|
|
'services/parts': 'parts-and-support',
|
|
'services': 'vending-machine-repairs',
|
|
'vending-machines': 'vending-machines',
|
|
'vending-machines/machines-we-use': 'vending-machines',
|
|
'vending-machines/machines-for-sale': 'vending-machines-for-sale-in-utah',
|
|
'warehouses': 'streamlining-snack-and-beverage-access-in-warehouse-environments',
|
|
'auto-repair': 'enhancing-auto-repair-facilities-with-convenient-vending-solutions',
|
|
'gyms': 'vending-machine-for-your-gym',
|
|
'community-centers': 'vending-for-your-community-centers',
|
|
'dance-studios': 'vending-machine-for-your-dance-studio',
|
|
'car-washes': 'vending-machines-for-your-car-wash',
|
|
'food-and-beverage/healthy-options': 'healthy-vending',
|
|
'food-and-beverage/traditional-options': 'traditional-vending',
|
|
'food-and-beverage/suppliers': 'diverse-vending-options-with-rocky-mountain-vendings-exclusive-wholesale-accounts',
|
|
'about-us': 'about-us',
|
|
'about/faqs': 'faqs',
|
|
};
|
|
|
|
console.log(`Processing ${data.pages.length} pages...`);
|
|
|
|
const updatedPages = data.pages.map(page => {
|
|
const updated = generateLinksForPage(page, data.pages, config, routeMapping);
|
|
if (updated.linksAdded > 0) {
|
|
console.log(` ✓ ${page.slug}: Added ${updated.linksAdded} link(s)`);
|
|
}
|
|
// Remove linksAdded from final output
|
|
const { linksAdded, ...pageWithoutLinksAdded } = updated;
|
|
return pageWithoutLinksAdded;
|
|
});
|
|
|
|
// Calculate total links from already processed pages
|
|
const totalLinks = updatedPages.reduce((sum, page, idx) => {
|
|
const originalPage = data.pages[idx];
|
|
if (originalPage) {
|
|
const updated = generateLinksForPage(originalPage, data.pages, config, routeMapping);
|
|
return sum + (updated.linksAdded || 0);
|
|
}
|
|
return sum;
|
|
}, 0);
|
|
|
|
console.log(`\nTotal links added: ${totalLinks}`);
|
|
|
|
// Write updated data
|
|
const updatedData = {
|
|
...data,
|
|
pages: updatedPages
|
|
};
|
|
|
|
// Create backup if file exists
|
|
if (fs.existsSync(OUTPUT_PATH)) {
|
|
const backupPath = OUTPUT_PATH + '.backup.' + Date.now();
|
|
fs.copyFileSync(OUTPUT_PATH, backupPath);
|
|
console.log(`\nBackup created: ${backupPath}`);
|
|
} else {
|
|
console.log('\nNo existing file to backup (creating new file)');
|
|
}
|
|
|
|
// Write updated file
|
|
fs.writeFileSync(OUTPUT_PATH, JSON.stringify(updatedData, null, 2));
|
|
console.log(`\nUpdated file written: ${OUTPUT_PATH}`);
|
|
console.log('\nDone!');
|
|
}
|
|
|
|
// Run if called directly
|
|
if (import.meta.url === `file://${process.argv[1]}` || process.argv[1]?.endsWith('generate-internal-links.js')) {
|
|
main();
|
|
}
|
|
|
|
export { main, generateLinksForPage, loadWordPressData, loadConfig };
|
|
|