Rocky_Mountain_Vending/lib/voice-assistant/persistence.ts

266 lines
7.6 KiB
TypeScript

import { ConvexHttpClient } from "convex/browser"
import { makeFunctionReference } from "convex/server"
import {
EgressClient,
EncodedFileOutput,
EncodedFileType,
S3Upload,
} from "livekit-server-sdk"
import { VOICE_ASSISTANT_SOURCE } from "@/lib/voice-assistant/types"
type VoiceRecordingStatus = "pending" | "starting" | "recording" | "completed" | "failed"
type CreateVoiceSessionArgs = {
roomName: string
participantIdentity: string
siteUrl?: string
pathname?: string
pageUrl?: string
source?: string
metadata?: string
startedAt?: number
recordingDisclosureAt?: number
recordingStatus?: VoiceRecordingStatus
}
type AddTranscriptTurnArgs = {
sessionId: string
roomName: string
participantIdentity: string
role: "user" | "assistant" | "system"
text: string
kind?: string
isFinal?: boolean
language?: string
source?: string
createdAt?: number
}
type UpdateVoiceRecordingArgs = {
sessionId: string
recordingStatus?: VoiceRecordingStatus
recordingId?: string
recordingUrl?: string
recordingError?: string
}
type CompleteVoiceSessionArgs = UpdateVoiceRecordingArgs & {
endedAt?: number
}
type VoiceRecordingConfig = {
bucket: string
egressClient: EgressClient
endpoint: string
forcePathStyle: boolean
pathPrefix: string
publicBaseUrl?: string
region: string
secret: string
accessKey: string
}
type VoicePersistenceServices = {
convexClient: ConvexHttpClient | null
recording: VoiceRecordingConfig | null
}
const CREATE_VOICE_SESSION = makeFunctionReference<"mutation">("voiceSessions:createSession")
const ADD_VOICE_TRANSCRIPT_TURN = makeFunctionReference<"mutation">("voiceSessions:addTranscriptTurn")
const UPDATE_VOICE_RECORDING = makeFunctionReference<"mutation">("voiceSessions:updateRecording")
const COMPLETE_VOICE_SESSION = makeFunctionReference<"mutation">("voiceSessions:completeSession")
function readOptionalEnv(name: string) {
const value = process.env[name]
return typeof value === "string" && value.trim() ? value.trim() : ""
}
function readBooleanEnv(name: string) {
const value = readOptionalEnv(name).toLowerCase()
if (!value) {
return undefined
}
return value === "1" || value === "true" || value === "yes"
}
function sanitizePathSegment(value: string) {
return value
.replace(/[^a-zA-Z0-9/_-]+/g, "-")
.replace(/-+/g, "-")
.replace(/\/+/g, "/")
.replace(/^-|-$/g, "")
}
function buildRecordingFilepath(roomName: string) {
const prefix = readOptionalEnv("VOICE_RECORDING_PATH_PREFIX") || "livekit-recordings"
const date = new Date()
const year = date.getUTCFullYear()
const month = String(date.getUTCMonth() + 1).padStart(2, "0")
const day = String(date.getUTCDate()).padStart(2, "0")
const stamp = date.toISOString().replace(/[:.]/g, "-")
const safeRoom = sanitizePathSegment(roomName)
return `${sanitizePathSegment(prefix)}/${year}/${month}/${day}/${safeRoom}-${stamp}.mp3`
}
function buildRecordingUrl(publicBaseUrl: string | undefined, bucket: string, filepath: string) {
const encodedPath = filepath
.split("/")
.map((segment) => encodeURIComponent(segment))
.join("/")
if (publicBaseUrl) {
return `${publicBaseUrl.replace(/\/$/, "")}/${encodedPath}`
}
return `s3://${bucket}/${filepath}`
}
export function getVoicePersistenceServices(args: {
livekitUrl: string
livekitApiKey: string
livekitApiSecret: string
}) {
const convexUrl = readOptionalEnv("NEXT_PUBLIC_CONVEX_URL") || readOptionalEnv("CONVEX_URL")
const convexClient = convexUrl ? new ConvexHttpClient(convexUrl) : null
const accessKey =
readOptionalEnv("VOICE_RECORDING_ACCESS_KEY_ID") ||
readOptionalEnv("CLOUDFLARE_R2_ACCESS_KEY_ID") ||
readOptionalEnv("AWS_ACCESS_KEY_ID") ||
readOptionalEnv("AWS_ACCESS_KEY")
const secret =
readOptionalEnv("VOICE_RECORDING_SECRET_ACCESS_KEY") ||
readOptionalEnv("CLOUDFLARE_R2_SECRET_ACCESS_KEY") ||
readOptionalEnv("AWS_SECRET_ACCESS_KEY") ||
readOptionalEnv("AWS_SECRET_KEY")
const endpoint =
readOptionalEnv("VOICE_RECORDING_ENDPOINT") ||
readOptionalEnv("CLOUDFLARE_R2_ENDPOINT")
const bucket = readOptionalEnv("VOICE_RECORDING_BUCKET")
const region = readOptionalEnv("VOICE_RECORDING_REGION") || readOptionalEnv("AWS_DEFAULT_REGION") || "auto"
const publicBaseUrl = readOptionalEnv("VOICE_RECORDING_PUBLIC_BASE_URL") || undefined
const forcePathStyle = readBooleanEnv("VOICE_RECORDING_FORCE_PATH_STYLE") ?? true
const requested = readBooleanEnv("VOICE_RECORDING_ENABLED")
const hasRecordingEnv = Boolean(accessKey && secret && endpoint && bucket)
const recordingEnabled = requested ?? hasRecordingEnv
const recording =
recordingEnabled && hasRecordingEnv
? {
accessKey,
bucket,
egressClient: new EgressClient(args.livekitUrl, args.livekitApiKey, args.livekitApiSecret),
endpoint,
forcePathStyle,
pathPrefix: readOptionalEnv("VOICE_RECORDING_PATH_PREFIX") || "livekit-recordings",
publicBaseUrl,
region,
secret,
}
: null
return {
convexClient,
recording,
} satisfies VoicePersistenceServices
}
async function safeMutation<TArgs extends object>(
client: ConvexHttpClient | null,
reference: ReturnType<typeof makeFunctionReference<"mutation">>,
args: TArgs,
) {
if (!client) {
return null
}
return await client.mutation(reference, args)
}
export async function createVoiceSessionRecord(
services: VoicePersistenceServices,
args: CreateVoiceSessionArgs,
) {
return await safeMutation(services.convexClient, CREATE_VOICE_SESSION, {
...args,
source: args.source || VOICE_ASSISTANT_SOURCE,
})
}
export async function addVoiceTranscriptTurn(
services: VoicePersistenceServices,
args: AddTranscriptTurnArgs,
) {
const text = args.text.replace(/\s+/g, " ").trim()
if (!text) {
return null
}
return await safeMutation(services.convexClient, ADD_VOICE_TRANSCRIPT_TURN, {
...args,
source: args.source || VOICE_ASSISTANT_SOURCE,
text,
})
}
export async function updateVoiceRecording(
services: VoicePersistenceServices,
args: UpdateVoiceRecordingArgs,
) {
return await safeMutation(services.convexClient, UPDATE_VOICE_RECORDING, args)
}
export async function completeVoiceSession(
services: VoicePersistenceServices,
args: CompleteVoiceSessionArgs,
) {
return await safeMutation(services.convexClient, COMPLETE_VOICE_SESSION, args)
}
export async function startVoiceRecording(
services: VoicePersistenceServices,
roomName: string,
) {
if (!services.recording) {
return null
}
const filepath = buildRecordingFilepath(roomName)
const output = new EncodedFileOutput({
fileType: EncodedFileType.MP3,
filepath,
output: {
case: "s3",
value: new S3Upload({
accessKey: services.recording.accessKey,
secret: services.recording.secret,
region: services.recording.region,
endpoint: services.recording.endpoint,
bucket: services.recording.bucket,
forcePathStyle: services.recording.forcePathStyle,
}),
},
})
const info = await services.recording.egressClient.startRoomCompositeEgress(roomName, output, {
audioOnly: true,
})
return {
recordingId: info.egressId,
recordingStatus: "recording" as const,
recordingUrl: buildRecordingUrl(services.recording.publicBaseUrl, services.recording.bucket, filepath),
}
}
export async function stopVoiceRecording(
services: VoicePersistenceServices,
recordingId: string | null | undefined,
) {
if (!services.recording || !recordingId) {
return null
}
return await services.recording.egressClient.stopEgress(recordingId)
}