Rocky_Mountain_Vending/lib/manuals-qdrant-corpus.test.ts

import assert from "node:assert/strict"
import test from "node:test"
import {
  evaluateManualsQdrantCorpus,
  getDefaultManualsQdrantEvaluationCases,
  getManualsQdrantCorpus,
  resetManualsQdrantCorpusCache,
  searchManualsQdrantCorpus,
} from "@/lib/manuals-qdrant-corpus"

const corpusPromise = getManualsQdrantCorpus()

test.after(() => {
  resetManualsQdrantCorpusCache()
})

test("manuals qdrant corpus builds from the full structured and extracted datasets", async () => {
  const corpus = await corpusPromise

  assert.equal(corpus.stats.structuredRecords, 497)
  assert.equal(corpus.stats.extractedRecords, 497)
  assert.equal(corpus.stats.chunkCount > 20000, true)
  assert.equal(corpus.stats.highConfidenceChunks > corpus.stats.fallbackChunks, true)
  assert.equal(corpus.manuals.some((manual) => manual.manualId === "unknown-unknown-manual"), false)
})

test("canonical manufacturers cover core vending families after normalization", async () => {
  const corpus = await corpusPromise

  const manufacturers = new Set(corpus.manuals.map((manual) => manual.manufacturer))

  assert.equal(manufacturers.has("Royal Vendors"), true)
  assert.equal(manufacturers.has("Dixie-Narco"), true)
  assert.equal(manufacturers.has("Crane"), true)
  assert.equal(manufacturers.has("AP"), true)
  assert.equal(manufacturers.has("Coinco"), true)
  assert.equal(manufacturers.has("Other"), true)
})

test("fault queries prefer troubleshooting over brochure content", async () => {
  const corpus = await corpusPromise
  const results = searchManualsQdrantCorpus(
    corpus,
    "Royal machine not accepting coins",
    {
      profile: "public_safe",
      limit: 5,
    }
  )

  assert.equal(results.length > 0, true)
  assert.equal(results[0]?.chunk.labels.includes("troubleshooting"), true)
  assert.equal(results[0]?.chunk.labels.includes("brochure"), false)
  assert.equal(results.some((result) => result.chunk.labels.includes("brochure")), false)
})

test("public-safe profile filters risky wiring chunks while internal-tech keeps them available", async () => {
  const corpus = await corpusPromise
  const publicResults = searchManualsQdrantCorpus(
    corpus,
    "Royal wiring diagram voltage issue",
    {
      profile: "public_safe",
      limit: 5,
    }
  )
  const internalResults = searchManualsQdrantCorpus(
    corpus,
    "Royal wiring diagram voltage issue",
    {
      profile: "internal_tech",
      limit: 5,
    }
  )

  assert.equal(
    publicResults.some((result) => result.chunk.labels.includes("wiring")),
    false
  )
  assert.equal(
    internalResults.some((result) => result.chunk.labels.includes("wiring")),
    true
  )
})

test("default evaluation set passes before the corpus is treated as production-ready", async () => {
  const corpus = await corpusPromise
  const evaluation = evaluateManualsQdrantCorpus(
    corpus,
    getDefaultManualsQdrantEvaluationCases()
  )

  assert.equal(evaluation.summary.totalCases, 6)
  assert.equal(
    evaluation.cases.every(
      (entry) =>
        entry.passedTop3Manufacturer !== false &&
        entry.passedTop5Label &&
        entry.passedDisallowedCheck
    ),
    true
  )
})

test("manuals qdrant corpus cache can be rebuilt on demand", async () => {
  const firstCorpus = await getManualsQdrantCorpus()

  resetManualsQdrantCorpusCache()

  const secondCorpus = await getManualsQdrantCorpus()

  assert.notEqual(firstCorpus, secondCorpus)
  assert.equal(secondCorpus.stats.structuredRecords, 497)
})