Add manuals knowledge retrieval and corpus tooling

2026-04-07 15:38:55 -06:00 · 2026-04-07 15:38:55 -06:00 · 087fda7ce6
commit 087fda7ce6
parent 96ad13d6a9
13 changed files with 4060 additions and 6 deletions
--- a/app/api/admin/manuals-knowledge/route.test.ts
+++ b/app/api/admin/manuals-knowledge/route.test.ts
@ -0,0 +1,48 @@
+import assert from "node:assert/strict"
+import test from "node:test"
+import { GET } from "@/app/api/admin/manuals-knowledge/route"
+
+const ORIGINAL_ADMIN_API_TOKEN = process.env.ADMIN_API_TOKEN
+
+test.afterEach(() => {
+  if (typeof ORIGINAL_ADMIN_API_TOKEN === "string") {
+    process.env.ADMIN_API_TOKEN = ORIGINAL_ADMIN_API_TOKEN
+  } else {
+    delete process.env.ADMIN_API_TOKEN
+  }
+})
+
+test("manuals knowledge admin route requires admin auth", async () => {
+  process.env.ADMIN_API_TOKEN = "secret-token"
+
+  const response = await GET(
+    new Request("http://localhost/api/admin/manuals-knowledge?query=rvv+660")
+  )
+
+  assert.equal(response.status, 401)
+})
+
+test("manuals knowledge admin route returns retrieval details for authorized queries", async () => {
+  process.env.ADMIN_API_TOKEN = "secret-token"
+
+  const response = await GET(
+    new Request(
+      "http://localhost/api/admin/manuals-knowledge?query=RVV+660+service+manual",
+      {
+        headers: {
+          "x-admin-token": "secret-token",
+        },
+      }
+    )
+  )
+
+  assert.equal(response.status, 200)
+
+  const body = await response.json()
+
+  assert.equal(body.summary.ran, true)
+  assert.equal(Array.isArray(body.result.manualCandidates), true)
+  assert.equal(body.result.manualCandidates.length > 0, true)
+  assert.equal(Array.isArray(body.result.topChunks), true)
+  assert.equal(Array.isArray(body.summary.topChunkCitations), true)
+})
--- a/app/api/admin/manuals-knowledge/route.ts
+++ b/app/api/admin/manuals-knowledge/route.ts
@ -0,0 +1,79 @@
+import { NextResponse } from "next/server"
+import {
+  getManualCitationContext,
+  retrieveManualContext,
+  summarizeManualRetrieval,
+} from "@/lib/manuals-knowledge"
+import { requireAdminToken } from "@/lib/server/admin-auth"
+
+function normalizeQuery(value: string | null) {
+  return (value || "").trim().slice(0, 400)
+}
+
+export async function GET(request: Request) {
+  const authError = requireAdminToken(request)
+  if (authError) {
+    return authError
+  }
+
+  try {
+    const { searchParams } = new URL(request.url)
+    const query = normalizeQuery(searchParams.get("query"))
+    const manufacturer = normalizeQuery(searchParams.get("manufacturer")) || null
+    const model = normalizeQuery(searchParams.get("model")) || null
+    const manualId = normalizeQuery(searchParams.get("manualId")) || null
+    const pageParam = searchParams.get("page")
+    const pageNumber =
+      pageParam && Number.isFinite(Number(pageParam))
+        ? Number.parseInt(pageParam, 10)
+        : undefined
+
+    if (!query) {
+      return NextResponse.json(
+        { error: "A query parameter is required." },
+        { status: 400 }
+      )
+    }
+
+    const result = await retrieveManualContext(query, {
+      manufacturer,
+      model,
+      manualId,
+    })
+    const citationContext =
+      manualId || result.bestManual?.manualId
+        ? await getManualCitationContext(
+            manualId || result.bestManual?.manualId || "",
+            pageNumber
+          )
+        : null
+
+    return NextResponse.json({
+      query,
+      filters: {
+        manufacturer,
+        model,
+        manualId,
+        pageNumber: pageNumber ?? null,
+      },
+      summary: summarizeManualRetrieval({
+        ran: true,
+        query,
+        result,
+      }),
+      result,
+      citationContext,
+    })
+  } catch (error) {
+    console.error("Failed to inspect manuals knowledge:", error)
+    return NextResponse.json(
+      {
+        error:
+          error instanceof Error
+            ? error.message
+            : "Failed to inspect manuals knowledge",
+      },
+      { status: 500 }
+    )
+  }
+}
--- a/app/api/chat/route.test.ts
+++ b/app/api/chat/route.test.ts
@ -0,0 +1,181 @@
+import assert from "node:assert/strict"
+import test from "node:test"
+import { NextRequest } from "next/server"
+import { POST } from "@/app/api/chat/route"
+
+type CapturedPayload = {
+  model: string
+  messages: Array<{ role: string; content: string }>
+}
+
+const ORIGINAL_FETCH = globalThis.fetch
+const ORIGINAL_XAI_KEY = process.env.XAI_API_KEY
+
+function buildVisitor(intent: string) {
+  return {
+    name: "Taylor",
+    phone: "(801) 555-1000",
+    email: "taylor@example.com",
+    intent,
+    serviceTextConsent: true,
+    marketingTextConsent: false,
+    consentVersion: "sms-consent-v1-2026-03-26",
+    consentCapturedAt: "2026-03-25T00:00:00.000Z",
+    consentSourcePage: "/contact-us",
+  }
+}
+
+function buildRequest(message: string, intent = "Manuals") {
+  return new NextRequest("http://localhost/api/chat", {
+    method: "POST",
+    headers: {
+      "content-type": "application/json",
+    },
+    body: JSON.stringify({
+      pathname: "/manuals",
+      sessionId: "test-session",
+      visitor: buildVisitor(intent),
+      messages: [{ role: "user", content: message }],
+    }),
+  })
+}
+
+async function runChatRouteWithSpy(
+  message: string,
+  intent = "Manuals"
+): Promise<{ response: Response; payload: CapturedPayload }> {
+  process.env.XAI_API_KEY = "test-xai-key"
+  let capturedPayload: CapturedPayload | null = null
+
+  globalThis.fetch = (async (_input: RequestInfo | URL, init?: RequestInit) => {
+    capturedPayload = JSON.parse(String(init?.body || "{}")) as CapturedPayload
+
+    return new Response(
+      JSON.stringify({
+        choices: [
+          {
+            message: {
+              content: "Mock Jessica reply.",
+            },
+          },
+        ],
+      }),
+      {
+        status: 200,
+        headers: {
+          "content-type": "application/json",
+        },
+      }
+    )
+  }) as typeof fetch
+
+  const response = await POST(buildRequest(message, intent))
+
+  assert.ok(capturedPayload)
+  return { response, payload: capturedPayload }
+}
+
+test.afterEach(() => {
+  globalThis.fetch = ORIGINAL_FETCH
+
+  if (typeof ORIGINAL_XAI_KEY === "string") {
+    process.env.XAI_API_KEY = ORIGINAL_XAI_KEY
+  } else {
+    delete process.env.XAI_API_KEY
+  }
+})
+
+test("chat route includes grounded manual context for RVV alias lookups", async () => {
+  const { response, payload } = await runChatRouteWithSpy(
+    "RVV 660 service manual"
+  )
+
+  assert.equal(response.status, 200)
+  assert.equal(
+    payload.messages.some(
+      (message) =>
+        message.role === "system" &&
+        message.content.includes("Manual knowledge context:")
+    ),
+    true
+  )
+  assert.equal(
+    payload.messages.some(
+      (message) =>
+        message.role === "system" &&
+        /Royal Vendors|660/i.test(message.content)
+    ),
+    true
+  )
+})
+
+test("chat route resolves Narco alias lookups into manual context", async () => {
+  const { payload } = await runChatRouteWithSpy("Narco bevmax not cooling")
+
+  const manualContext = payload.messages.find(
+    (message) =>
+      message.role === "system" &&
+      message.content.includes("Manual knowledge context:")
+  )
+
+  assert.ok(manualContext)
+  assert.match(manualContext.content, /Dixie-Narco|Narco/i)
+})
+
+test("chat route low-confidence manual queries instruct Jessica to ask for brand model or photo", async () => {
+  const { payload } = await runChatRouteWithSpy(
+    "manual for flibbertigibbet machine"
+  )
+
+  const manualContext = payload.messages.find(
+    (message) =>
+      message.role === "system" &&
+      message.content.includes("Manual knowledge context:")
+  )
+
+  assert.ok(manualContext)
+  assert.match(
+    manualContext.content,
+    /brand on the front|model sticker|photo\/video/i
+  )
+})
+
+test("chat route risky technical manual queries inject conservative safety context", async () => {
+  const { payload } = await runChatRouteWithSpy(
+    "Royal wiring diagram voltage manual",
+    "Repairs"
+  )
+
+  const systemPrompt = payload.messages[0]?.content || ""
+  const manualContext = payload.messages.find(
+    (message) =>
+      message.role === "system" &&
+      message.content.includes("Manual knowledge context:")
+  )
+
+  assert.match(
+    systemPrompt,
+    /Do not provide step-by-step repair procedures, wiring guidance, voltage guidance/i
+  )
+  assert.ok(manualContext)
+  assert.match(manualContext.content, /technical or risky/i)
+})
+
+test("chat route skips manuals retrieval for non-manual conversations", async () => {
+  const { payload } = await runChatRouteWithSpy(
+    "Can someone call me back about free placement?",
+    "Free Placement"
+  )
+
+  const systemMessages = payload.messages.filter(
+    (message) => message.role === "system"
+  )
+
+  assert.equal(systemMessages.length, 1)
+  assert.equal(
+    systemMessages.some((message) =>
+      message.content.includes("Manual knowledge context:")
+    ),
+    false
+  )
+})
--- a/app/api/chat/route.ts
+++ b/app/api/chat/route.ts
@ -17,12 +17,18 @@ import {
  SITE_CHAT_TEMPERATURE,
  isSiteChatSuppressedRoute,
 } from "@/lib/site-chat/config"
-import { SITE_CHAT_SYSTEM_PROMPT } from "@/lib/site-chat/prompt"
+import { buildSiteChatSystemPrompt } from "@/lib/site-chat/prompt"
 import {
  consumeChatOutput,
  consumeChatRequest,
  getChatRateLimitStatus,
 } from "@/lib/site-chat/rate-limit"
+import {
+  formatManualContextForPrompt,
+  retrieveManualContext,
+  shouldUseManualKnowledgeForChat,
+  summarizeManualRetrieval,
+} from "@/lib/manuals-knowledge"
 import { createSmsConsentPayload } from "@/lib/sms-compliance"

 type ChatRole = "user" | "assistant"
@ -208,6 +214,15 @@ function extractAssistantText(data: any) {
  return ""
 }

+function buildManualKnowledgeQuery(messages: ChatMessage[]) {
+  return messages
+    .filter((message) => message.role === "user")
+    .slice(-3)
+    .map((message) => message.content.trim())
+    .filter(Boolean)
+    .join(" ")
+}
+
 export async function POST(request: NextRequest) {
  const responseHeaders: Record<string, string> = {
    "Cache-Control": "no-store",
@ -299,6 +314,36 @@ export async function POST(request: NextRequest) {
      sessionId,
    })

+    const manualKnowledgeQuery = buildManualKnowledgeQuery(messages)
+    const shouldUseManualKnowledge = shouldUseManualKnowledgeForChat(
+      visitor.intent,
+      manualKnowledgeQuery
+    )
+    let manualKnowledge = null
+    let manualKnowledgeError: unknown = null
+    if (shouldUseManualKnowledge) {
+      try {
+        manualKnowledge = await retrieveManualContext(manualKnowledgeQuery)
+      } catch (error) {
+        manualKnowledgeError = error
+        console.error("[site-chat] manuals knowledge lookup failed", {
+          pathname,
+          sessionId,
+          error,
+        })
+      }
+    }
+    console.info(
+      "[site-chat] manuals retrieval",
+      summarizeManualRetrieval({
+        ran: shouldUseManualKnowledge,
+        query: manualKnowledgeQuery,
+        result: manualKnowledge,
+        error: manualKnowledgeError,
+      })
+    )
+    const systemPrompt = buildSiteChatSystemPrompt()
+
    const xaiApiKey = getOptionalEnv("XAI_API_KEY")
    if (!xaiApiKey) {
      console.warn("[site-chat] missing XAI_API_KEY", {
@ -331,8 +376,18 @@ export async function POST(request: NextRequest) {
          messages: [
            {
              role: "system",
-              content: `${SITE_CHAT_SYSTEM_PROMPT}\n\nConversation context:\n- Current pathname: ${pathname}\n- Source: ${SITE_CHAT_SOURCE}\n- Visitor name: ${visitor.name}\n- Visitor email: ${visitor.email}\n- Visitor phone: ${visitor.phone}\n- Visitor intent: ${visitor.intent}\n- Service SMS consent: ${visitor.serviceTextConsent ? "yes" : "no"}\n- Marketing SMS consent: ${visitor.marketingTextConsent ? "yes" : "no"}`,
+              content: `${systemPrompt}\n\nConversation context:\n- Current pathname: ${pathname}\n- Source: ${SITE_CHAT_SOURCE}\n- Visitor name: ${visitor.name}\n- Visitor email: ${visitor.email}\n- Visitor phone: ${visitor.phone}\n- Visitor intent: ${visitor.intent}\n- Service SMS consent: ${visitor.serviceTextConsent ? "yes" : "no"}\n- Marketing SMS consent: ${visitor.marketingTextConsent ? "yes" : "no"}`,
            },
+            ...(shouldUseManualKnowledge
+              ? [
+                  {
+                    role: "system" as const,
+                    content: manualKnowledge
+                      ? formatManualContextForPrompt(manualKnowledge)
+                      : "Manual knowledge context:\n- A manual lookup was attempted, but no reliable manual context is available.\n- Do not guess. Ask for the brand, model sticker, or a clear photo/video that can be texted in.",
+                  },
+                ]
+              : []),
            ...messages,
          ],
        }),
--- a/docs/operations/JESSICA_MANUALS_KNOWLEDGE.md
+++ b/docs/operations/JESSICA_MANUALS_KNOWLEDGE.md
@ -0,0 +1,39 @@
+# Jessica Manuals Knowledge
+
+## What feeds the manuals knowledge layer
+- Primary source: tenant-filtered exports from the shared `manuals-platform` package.
+- Rocky consumes `manuals-platform/output/tenants/rocky-mountain-vending/manuals.json`.
+- Rocky consumes `manuals-platform/output/tenants/rocky-mountain-vending/chunks.json`.
+- If shared exports are missing in local development, the RMV app can still fall back to its in-repo builder.
+
+## How the corpus is built
+- The shared `manuals-platform` package scans the portfolio manuals tree, assigns tenant entitlements, and writes prebuilt artifacts.
+- RMV loads the Rocky tenant artifact on first use after process start.
+- Public Jessica retrieval is therefore consuming a tenant-filtered export rather than rebuilding the raw manuals corpus itself.
+
+## How new manuals become searchable
+- Add or update source PDFs under `manuals-data`.
+- Rebuild the shared package artifacts from `manuals-platform` so tenant exports are refreshed.
+- Restart the Next.js server or deployment so RMV reloads the updated tenant artifact on first use.
+
+## Cache refresh behavior
+- The shared package writes persistent JSON artifacts under `manuals-platform/output`.
+- RMV still caches the loaded Rocky tenant artifact in memory.
+- A manual cache reset helper exists in `lib/manuals-knowledge.ts` for future admin tooling or deploy hooks.
+- Today, the simplest refresh flow is: rebuild shared artifacts, then restart the app.
+
+## Observability
+- The site chat route logs a metadata-only manuals retrieval summary before the xAI request.
+- The logs include whether retrieval ran, top manual candidate IDs, top chunk citations, clarification state, risk flag, and any retrieval error.
+- Full chunk text is not logged.
+
+## Internal debug surface
+- Internal endpoint: `GET /api/admin/manuals-knowledge`
+- Auth: `x-admin-token` or `Authorization: Bearer <ADMIN_API_TOKEN>`
+- Example query:
+  - `query=RVV 660 service manual`
+  - optional `manufacturer`
+  - optional `model`
+  - optional `manualId`
+  - optional `page`
+- The endpoint returns retrieval summary, matched manuals, top chunks, and citation context for internal inspection only.
--- a/docs/operations/MANUALS_QDRANT_READINESS.md
+++ b/docs/operations/MANUALS_QDRANT_READINESS.md
@ -0,0 +1,40 @@
+# Manuals Qdrant Readiness
+
+## Purpose
+- The long-term source of truth for this pipeline is now the shared `manuals-platform` package at the workspace root.
+- The RMV repo keeps this document as a consumer-side reference for the tenant-filtered artifacts Rocky reads.
+
+## Source inputs
+- Shared package location: `../manuals-platform`
+- Shared build outputs: `../manuals-platform/output/full/*`
+- Rocky tenant outputs: `../manuals-platform/output/tenants/rocky-mountain-vending/*`
+
+## What the corpus builder does
+- The shared package scans the full portfolio manual set, classifies every PDF, assigns tenant entitlements, and publishes tenant-filtered Qdrant-ready artifacts.
+- It keeps `public_safe` and `internal_tech` retrieval profiles on top of one central corpus.
+- Rocky consumes the prebuilt Rocky tenant export instead of rebuilding from raw manuals data inside the app.
+
+## Build and evaluation commands
+- Build artifacts:
+  - `pnpm manuals:qdrant:build`
+- Build artifacts into a custom directory:
+  - `pnpm manuals:qdrant:build -- --output-dir /absolute/path`
+- Run the evaluation set:
+  - `pnpm manuals:qdrant:eval`
+
+## Artifact output
+- Default output directory: `output/manuals-qdrant`
+- Important files:
+  - `summary.json`
+  - `manuals.json`
+  - `chunks.json`
+  - `chunks-high-confidence.json`
+  - `chunks-public-safe.json`
+  - `chunks-internal-tech.json`
+  - `evaluation-cases.json`
+  - `evaluation-report.json`
+
+## Operational notes
+- The first Qdrant prototype should ingest `chunks-high-confidence.json` or `chunks-internal-tech.json`, not the full raw corpus.
+- Public-facing experiences should stay on `public_safe` filters even after Qdrant is introduced.
+- After manuals-data changes, rebuild the artifacts so the new normalized corpus and evaluation report stay in sync.
--- a/lib/manuals-knowledge.test.ts
+++ b/lib/manuals-knowledge.test.ts
@ -0,0 +1,79 @@
+import assert from "node:assert/strict"
+import test from "node:test"
+import {
+  findManualCandidates,
+  getManualCitationContext,
+  resetManualKnowledgeCache,
+  retrieveManualContext,
+  shouldUseManualKnowledgeForChat,
+} from "@/lib/manuals-knowledge"
+
+test("shouldUseManualKnowledgeForChat only triggers for relevant conversations", () => {
+  assert.equal(
+    shouldUseManualKnowledgeForChat(
+      "Repairs",
+      "My Royal machine is not accepting coins"
+    ),
+    true
+  )
+  assert.equal(shouldUseManualKnowledgeForChat("Other", "Hello there"), false)
+})
+
+test("findManualCandidates resolves RVV alias queries to Royal Vendors manuals", async () => {
+  const candidates = await findManualCandidates("RVV 660 service manual")
+
+  assert.ok(candidates.length > 0)
+  assert.equal(candidates[0]?.manufacturer, "Royal Vendors")
+  assert.match(candidates[0]?.filename || "", /660|700|gii|giii|rvv/i)
+})
+
+test("findManualCandidates resolves Narco-style queries to Dixie-Narco manuals", async () => {
+  const candidates = await findManualCandidates("Narco bevmax not cooling")
+
+  assert.ok(candidates.length > 0)
+  assert.equal(candidates[0]?.manufacturer, "Dixie-Narco")
+})
+
+test("retrieveManualContext returns grounded troubleshooting chunks for simple public help", async () => {
+  const result = await retrieveManualContext("Royal machine not accepting coins")
+
+  assert.ok(result.manualCandidates.length > 0)
+  assert.equal(result.topChunks.length > 0, true)
+  assert.equal(result.topChunks[0]?.manufacturer, "Royal Vendors")
+  assert.match(result.topChunks[0]?.text || "", /not accepting coins/i)
+  assert.equal(result.isRisky, false)
+})
+
+test("getManualCitationContext returns citations for a retrieved manual page", async () => {
+  const result = await retrieveManualContext("Royal machine not accepting coins")
+  const firstChunk = result.topChunks[0]
+
+  assert.ok(firstChunk)
+
+  const citationContext = await getManualCitationContext(
+    firstChunk.manualId,
+    firstChunk.pageNumber || undefined
+  )
+
+  assert.ok(citationContext.manual)
+  assert.ok(citationContext.citations.length > 0)
+  assert.equal(
+    citationContext.citations.some(
+      (citation) => citation.pageNumber === firstChunk.pageNumber
+    ),
+    true
+  )
+})
+
+test("resetManualKnowledgeCache rebuilds the manuals corpus on demand", async () => {
+  const beforeReset = await findManualCandidates("RVV 660 service manual")
+
+  resetManualKnowledgeCache()
+
+  const afterReset = await findManualCandidates("RVV 660 service manual")
+
+  assert.ok(beforeReset.length > 0)
+  assert.ok(afterReset.length > 0)
+  assert.equal(beforeReset[0]?.manufacturer, afterReset[0]?.manufacturer)
+  assert.equal(beforeReset[0]?.manualId, afterReset[0]?.manualId)
+})
--- a/lib/manuals-knowledge.ts
+++ b/lib/manuals-knowledge.ts
--- a/lib/manuals-qdrant-corpus.test.ts
+++ b/lib/manuals-qdrant-corpus.test.ts
@ -0,0 +1,114 @@
+import assert from "node:assert/strict"
+import test from "node:test"
+import {
+  evaluateManualsQdrantCorpus,
+  getDefaultManualsQdrantEvaluationCases,
+  getManualsQdrantCorpus,
+  resetManualsQdrantCorpusCache,
+  searchManualsQdrantCorpus,
+} from "@/lib/manuals-qdrant-corpus"
+
+const corpusPromise = getManualsQdrantCorpus()
+
+test.after(() => {
+  resetManualsQdrantCorpusCache()
+})
+
+test("manuals qdrant corpus builds from the full structured and extracted datasets", async () => {
+  const corpus = await corpusPromise
+
+  assert.equal(corpus.stats.structuredRecords, 497)
+  assert.equal(corpus.stats.extractedRecords, 497)
+  assert.equal(corpus.stats.chunkCount > 20000, true)
+  assert.equal(corpus.stats.highConfidenceChunks > corpus.stats.fallbackChunks, true)
+  assert.equal(corpus.manuals.some((manual) => manual.manualId === "unknown-unknown-manual"), false)
+})
+
+test("canonical manufacturers cover core vending families after normalization", async () => {
+  const corpus = await corpusPromise
+
+  const manufacturers = new Set(corpus.manuals.map((manual) => manual.manufacturer))
+
+  assert.equal(manufacturers.has("Royal Vendors"), true)
+  assert.equal(manufacturers.has("Dixie-Narco"), true)
+  assert.equal(manufacturers.has("Crane"), true)
+  assert.equal(manufacturers.has("AP"), true)
+  assert.equal(manufacturers.has("Coinco"), true)
+  assert.equal(manufacturers.has("Other"), true)
+})
+
+test("fault queries prefer troubleshooting over brochure content", async () => {
+  const corpus = await corpusPromise
+  const results = searchManualsQdrantCorpus(
+    corpus,
+    "Royal machine not accepting coins",
+    {
+      profile: "public_safe",
+      limit: 5,
+    }
+  )
+
+  assert.equal(results.length > 0, true)
+  assert.equal(results[0]?.chunk.labels.includes("troubleshooting"), true)
+  assert.equal(results[0]?.chunk.labels.includes("brochure"), false)
+  assert.equal(results.some((result) => result.chunk.labels.includes("brochure")), false)
+})
+
+test("public-safe profile filters risky wiring chunks while internal-tech keeps them available", async () => {
+  const corpus = await corpusPromise
+  const publicResults = searchManualsQdrantCorpus(
+    corpus,
+    "Royal wiring diagram voltage issue",
+    {
+      profile: "public_safe",
+      limit: 5,
+    }
+  )
+  const internalResults = searchManualsQdrantCorpus(
+    corpus,
+    "Royal wiring diagram voltage issue",
+    {
+      profile: "internal_tech",
+      limit: 5,
+    }
+  )
+
+  assert.equal(
+    publicResults.some((result) => result.chunk.labels.includes("wiring")),
+    false
+  )
+  assert.equal(
+    internalResults.some((result) => result.chunk.labels.includes("wiring")),
+    true
+  )
+})
+
+test("default evaluation set passes before the corpus is treated as production-ready", async () => {
+  const corpus = await corpusPromise
+  const evaluation = evaluateManualsQdrantCorpus(
+    corpus,
+    getDefaultManualsQdrantEvaluationCases()
+  )
+
+  assert.equal(evaluation.summary.totalCases, 6)
+  assert.equal(
+    evaluation.cases.every(
+      (entry) =>
+        entry.passedTop3Manufacturer !== false &&
+        entry.passedTop5Label &&
+        entry.passedDisallowedCheck
+    ),
+    true
+  )
+})
+
+test("manuals qdrant corpus cache can be rebuilt on demand", async () => {
+  const firstCorpus = await getManualsQdrantCorpus()
+
+  resetManualsQdrantCorpusCache()
+
+  const secondCorpus = await getManualsQdrantCorpus()
+
+  assert.notEqual(firstCorpus, secondCorpus)
+  assert.equal(secondCorpus.stats.structuredRecords, 497)
+})
--- a/lib/manuals-qdrant-corpus.ts
+++ b/lib/manuals-qdrant-corpus.ts
--- a/lib/site-chat/prompt.ts
+++ b/lib/site-chat/prompt.ts
@ -2,9 +2,9 @@ import { businessConfig, serviceAreas } from "@/lib/seo-config"

 const SERVICE_AREA_LIST = serviceAreas.map((area) => area.city).join(", ")

-export const SITE_CHAT_SYSTEM_PROMPT = `You are Jessica, a super friendly and casual text-chat assistant for ${businessConfig.legalName} in Utah. Sound like a chill local friend who is genuinely trying to help. Use warm, natural phrases like "Hey," "Gotcha," "No worries," "That helps a ton," and "Just curious," when they fit. Never sound robotic, salesy, or overly formal.
+const SITE_CHAT_SYSTEM_PROMPT_BASE = `You are Jessica, a super friendly and casual text-chat assistant for ${businessConfig.legalName} in Utah. Sound like a chill local friend who is genuinely trying to help. Use warm, natural phrases like "Hey," "Gotcha," "No worries," "That helps a ton," and "Just curious," when they fit. Never sound robotic, salesy, or overly formal.

-Use this exact knowledge base and do not go beyond it:
+Use only the knowledge provided in this system prompt plus any manual knowledge context supplied later in the conversation. Do not go beyond that information.
 - Free vending placement is only for qualifying businesses. Rocky Mountain Vending installs, stocks, maintains, and repairs those machines at no cost to the business.
 - Repairs and maintenance are for machines the customer owns.
 - Moving requests can be for a vending machine or a safe, and they follow the same intake flow as repairs.
@ -22,12 +22,23 @@ Conversation rules:
 - For repairs or moving, start by asking what the machine looks like, what brand is on the front, or what they already know. If the move is involved, clarify whether it is for a vending machine or a safe. Later, direct them to text photos or videos to ${businessConfig.publicSmsNumber} or use the contact form so the team can diagnose remotely first.
 - For free placement, first confirm it is for a business. Then ask about the business type, then the approximate number of people, then the location over separate turns.
 - For sales, first ask what kind of machine or features they are thinking about. Ask about new or used and budget later, not all at once.
- For manuals or parts, ask what they remember about the machine or part instead of only asking for a model number.
+- For manuals, parts, or troubleshooting, ask what they remember about the machine or part instead of only asking for a model number.
+- When manual knowledge context is present, use only that retrieved context for manuals, parts, and troubleshooting replies.
+- For manuals, parts, or troubleshooting, stay limited to easy identification, likely issue category, and basic safe checks pulled from the retrieved context.
+- Cite the manual naturally when useful, like mentioning the manual name and page number in plain language.
+- If manual context is missing or low-confidence, do not guess. Ask for the brand, model sticker, or a clear photo/video that they can text to ${businessConfig.publicSmsNumber}.
+- Do not provide step-by-step repair procedures, wiring guidance, voltage guidance, bypasses, or risky technical instructions.
 - If the visitor asks about a place that appears on the current website, treat it as inside the service area unless a human needs to confirm edge-case coverage.

 Safety rules:
 - Never mention, quote, or hint at prices, service call fees, repair rates, hourly rates, parts costs, or internal policies.
- If the visitor asks about pricing or cost, say: "Our complete vending service, including installation, stocking, and maintenance, is provided at no cost to qualifying businesses. I can get a few details so our team can schedule a quick call with you." 
+- If the visitor asks about pricing or cost, say: "Our complete vending service, including installation, stocking, and maintenance, is provided at no cost to qualifying businesses. I can get a few details so our team can schedule a quick call with you."
 - Do not invent timelines, guarantees, inventory, contract terms, or legal details.
 - If something needs confirmation, say a team member can confirm it.
 `
+
+export function buildSiteChatSystemPrompt() {
+  return SITE_CHAT_SYSTEM_PROMPT_BASE
+}
+
+export const SITE_CHAT_SYSTEM_PROMPT = buildSiteChatSystemPrompt()
--- a/scripts/build-manuals-qdrant-corpus.ts
+++ b/scripts/build-manuals-qdrant-corpus.ts
@ -0,0 +1,37 @@
+import { join } from "node:path"
+import { parseArgs } from "node:util"
+import { writeManualsQdrantArtifacts } from "@/lib/manuals-qdrant-corpus"
+
+const { values } = parseArgs({
+  args: process.argv.slice(2),
+  options: {
+    "output-dir": {
+      type: "string",
+    },
+  },
+})
+
+const defaultOutputDir = join(process.cwd(), "output", "manuals-qdrant")
+
+async function main() {
+  const result = await writeManualsQdrantArtifacts({
+    outputDir: values["output-dir"] || defaultOutputDir,
+  })
+
+  const summary = {
+    outputDir: result.outputDir,
+    manuals: result.corpus.manuals.length,
+    chunks: result.corpus.chunks.length,
+    highConfidenceChunks: result.corpus.stats.highConfidenceChunks,
+    fallbackChunks: result.corpus.stats.fallbackChunks,
+    excludedChunks: result.corpus.stats.excludedChunks,
+    evaluation: result.evaluation.summary,
+  }
+
+  console.log(JSON.stringify(summary, null, 2))
+}
+
+main().catch((error) => {
+  console.error(error)
+  process.exitCode = 1
+})
--- a/scripts/evaluate-manuals-qdrant-corpus.ts
+++ b/scripts/evaluate-manuals-qdrant-corpus.ts
@ -0,0 +1,33 @@
+import {
+  buildManualsQdrantCorpus,
+  evaluateManualsQdrantCorpus,
+} from "@/lib/manuals-qdrant-corpus"
+
+async function main() {
+  const corpus = await buildManualsQdrantCorpus()
+  const evaluation = evaluateManualsQdrantCorpus(corpus)
+
+  const failingCases = evaluation.cases.filter(
+    (entry) =>
+      entry.passedTop3Manufacturer === false ||
+      !entry.passedTop5Label ||
+      !entry.passedDisallowedCheck
+  )
+
+  console.log(
+    JSON.stringify(
+      {
+        generatedAt: corpus.generatedAt,
+        summary: evaluation.summary,
+        failingCases,
+      },
+      null,
+      2
+    )
+  )
+}
+
+main().catch((error) => {
+  console.error(error)
+  process.exitCode = 1
+})