47 lines
1.5 KiB
SQL
47 lines
1.5 KiB
SQL
-- Retrieval schema for external knowledge ingestion
|
|
CREATE EXTENSION IF NOT EXISTS vector;
|
|
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
|
|
|
CREATE SCHEMA IF NOT EXISTS retrieval;
|
|
|
|
CREATE TABLE IF NOT EXISTS retrieval.items (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
external_id TEXT UNIQUE,
|
|
kind TEXT CHECK (kind IN ('api_doc','code_symbol','snippet','note')) NOT NULL,
|
|
lang TEXT,
|
|
framework TEXT,
|
|
version TEXT,
|
|
meta JSONB DEFAULT '{}'::jsonb,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS retrieval.chunks (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
item_id BIGINT REFERENCES retrieval.items(id) ON DELETE CASCADE,
|
|
content TEXT NOT NULL,
|
|
token_count INT,
|
|
symbol TEXT,
|
|
section_path TEXT,
|
|
modality TEXT DEFAULT 'text',
|
|
hash TEXT,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS retrieval.embeddings (
|
|
chunk_id BIGINT PRIMARY KEY REFERENCES retrieval.chunks(id) ON DELETE CASCADE,
|
|
embedding VECTOR(1024),
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
);
|
|
|
|
CREATE UNIQUE INDEX IF NOT EXISTS retrieval_chunks_hash_idx
|
|
ON retrieval.chunks(hash)
|
|
WHERE hash IS NOT NULL;
|
|
|
|
CREATE INDEX IF NOT EXISTS retrieval_embeddings_ivf
|
|
ON retrieval.embeddings USING ivfflat (embedding vector_cosine_ops)
|
|
WITH (lists = 2048);
|
|
|
|
CREATE INDEX IF NOT EXISTS retrieval_chunks_content_trgm
|
|
ON retrieval.chunks USING gin (content gin_trgm_ops);
|