import { z } from 'zod';

export const TEXT_AGE_VALID_FOR_CLASSIFICATION = 700;

const idCounterPerSpeaker: Record<string, number> = {};
const idToFirstTimestamp: Record<string, number> = {};

let zeroReferenceTimestamp: number | undefined = undefined;
const delayBufferWindowSize = 10;
const avgDelayBuffer: number[] = [];

// TODO move to another file
export function parseMessage(msg_string: string) {
    // suggest extracting this schema and type out of the function
    const data = z
        .object({
            transcript: z.object({
                speaker: z.string(),
                speaker_id: z.string().or(z.number()),
                words: z.array(
                    z.object({
                        start_time: z.number(),
                        end_time: z.number(),
                        text: z.string(),
                    })
                ),
                is_final: z.boolean(),
            }),
        })
        .parse(JSON.parse(msg_string));

    if (!zeroReferenceTimestamp && data.transcript.words[0]) {
        zeroReferenceTimestamp = Date.now() - data.transcript.words[0].start_time * 1000;
    }
    if (data.transcript.words[data.transcript.words.length - 1] && zeroReferenceTimestamp) {
        const estimatedDelay =
            Date.now() -
            (data.transcript.words[data.transcript.words.length - 1].end_time * 1000 + zeroReferenceTimestamp);

        avgDelayBuffer.push(estimatedDelay);
        if (avgDelayBuffer.length > delayBufferWindowSize) {
            avgDelayBuffer.shift();
        }
    }
    let estimatedAverageTranscriptDelay: number | undefined = undefined;
    if (avgDelayBuffer.length > 0) {
        estimatedAverageTranscriptDelay = avgDelayBuffer.reduce((a, b) => a + b, 0) / avgDelayBuffer.length;
    }

    const transcript = data.transcript;
    const speaker = transcript.speaker;
    const words = (transcript.words ?? []).map((x) => x.text).join(' ');

    if (!idCounterPerSpeaker[speaker]) {
        idCounterPerSpeaker[speaker] = 0;
    }

    const original_transcript_id = `${transcript.speaker_id}-${idCounterPerSpeaker[speaker]}`;

    if (!idToFirstTimestamp[original_transcript_id]) {
        idToFirstTimestamp[original_transcript_id] = Date.now();
    }

    if (transcript.is_final) {
        idCounterPerSpeaker[speaker]++;
    }

    const relativeFirstWordStartTime = transcript.words[0].start_time * 1000;

    return {
        speaker,
        words,
        rawWords:
            transcript.words.map((o) => ({
                text: o.text,
                startTimestamp:
                    o.start_time * 1000 - relativeFirstWordStartTime + idToFirstTimestamp[original_transcript_id],
                endTimestamp:
                    o.end_time * 1000 - relativeFirstWordStartTime + idToFirstTimestamp[original_transcript_id],
            })) ?? [],
        isFinal: transcript.is_final,
        original_transcript_id,
        firstWordTimestamp: idToFirstTimestamp[original_transcript_id],
        estimatedAverageTranscriptDelay,
    };
}
