metal-kompanion/ingest/db/schema.sql

47 lines
1.5 KiB
SQL

-- Retrieval schema for external knowledge ingestion
CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS pg_trgm;
CREATE SCHEMA IF NOT EXISTS retrieval;
CREATE TABLE IF NOT EXISTS retrieval.items (
id BIGSERIAL PRIMARY KEY,
external_id TEXT UNIQUE,
kind TEXT CHECK (kind IN ('api_doc','code_symbol','snippet','note')) NOT NULL,
lang TEXT,
framework TEXT,
version TEXT,
meta JSONB DEFAULT '{}'::jsonb,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE TABLE IF NOT EXISTS retrieval.chunks (
id BIGSERIAL PRIMARY KEY,
item_id BIGINT REFERENCES retrieval.items(id) ON DELETE CASCADE,
content TEXT NOT NULL,
token_count INT,
symbol TEXT,
section_path TEXT,
modality TEXT DEFAULT 'text',
hash TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE TABLE IF NOT EXISTS retrieval.embeddings (
chunk_id BIGINT PRIMARY KEY REFERENCES retrieval.chunks(id) ON DELETE CASCADE,
embedding VECTOR(1024),
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE UNIQUE INDEX IF NOT EXISTS retrieval_chunks_hash_idx
ON retrieval.chunks(hash)
WHERE hash IS NOT NULL;
CREATE INDEX IF NOT EXISTS retrieval_embeddings_ivf
ON retrieval.embeddings USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 2048);
CREATE INDEX IF NOT EXISTS retrieval_chunks_content_trgm
ON retrieval.chunks USING gin (content gin_trgm_ops);