metal-kompanion/db/schema.sql

106 lines
4.0 KiB
SQL

-- Requires: CREATE EXTENSION IF NOT EXISTS vector; CREATE EXTENSION IF NOT EXISTS ltree;
CREATE SCHEMA IF NOT EXISTS komp;
CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS ltree;
CREATE TABLE IF NOT EXISTS komp.source (
id BIGSERIAL PRIMARY KEY,
kind TEXT NOT NULL, -- filesystem|repo|url|note
uri TEXT NOT NULL, -- path or URL
repo TEXT,
ref TEXT,
meta JSONB DEFAULT '{}'::jsonb,
created_at TIMESTAMPTZ DEFAULT now()
);
CREATE TABLE IF NOT EXISTS komp.chunk (
id BIGSERIAL PRIMARY KEY,
source_id BIGINT REFERENCES komp.source(id) ON DELETE CASCADE,
lineno INT,
text TEXT NOT NULL,
sha256 TEXT NOT NULL,
tokens INT,
created_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_chunk_source ON komp.chunk(source_id);
-- A space is a distinct memory with its own model+dim & policy
CREATE TABLE IF NOT EXISTS komp.space (
id SERIAL PRIMARY KEY,
name TEXT UNIQUE, -- dev_knowledge | pattern_exchange | runtime_memory
model TEXT NOT NULL,
dim INT NOT NULL,
metric TEXT NOT NULL DEFAULT 'cosine'
);
-- Embedding tables per common dimension (add more as needed)
CREATE TABLE IF NOT EXISTS komp.embedding_768 (
id BIGSERIAL PRIMARY KEY,
chunk_id BIGINT REFERENCES komp.chunk(id) ON DELETE CASCADE,
space_id INT REFERENCES komp.space(id) ON DELETE CASCADE,
embedding VECTOR(768) NOT NULL,
created_at TIMESTAMPTZ DEFAULT now(),
UNIQUE(chunk_id, space_id)
);
CREATE INDEX IF NOT EXISTS idx_embed768_space ON komp.embedding_768(space_id);
CREATE INDEX IF NOT EXISTS ivf_embed768 ON komp.embedding_768 USING ivfflat (embedding vector_cosine_ops) WITH (lists=100);
CREATE TABLE IF NOT EXISTS komp.embedding_1024 (
id BIGSERIAL PRIMARY KEY,
chunk_id BIGINT REFERENCES komp.chunk(id) ON DELETE CASCADE,
space_id INT REFERENCES komp.space(id) ON DELETE CASCADE,
embedding VECTOR(1024) NOT NULL,
created_at TIMESTAMPTZ DEFAULT now(),
UNIQUE(chunk_id, space_id)
);
CREATE INDEX IF NOT EXISTS idx_embed1024_space ON komp.embedding_1024(space_id);
CREATE INDEX IF NOT EXISTS ivf_embed1024 ON komp.embedding_1024 USING ivfflat (embedding vector_cosine_ops) WITH (lists=100);
-- Branch hierarchy (Branch Embeddings): path encodes the cluster tree (e.g., physics.quantum.tunneling)
CREATE TABLE IF NOT EXISTS komp.branch (
id BIGSERIAL PRIMARY KEY,
space_id INT REFERENCES komp.space(id) ON DELETE CASCADE,
path LTREE NOT NULL,
label TEXT,
meta JSONB DEFAULT '{}'::jsonb,
UNIQUE(space_id, path)
);
CREATE INDEX IF NOT EXISTS gist_branch_path ON komp.branch USING GIST (path);
-- Centroids per dimension (store only the dim matching the space)
CREATE TABLE IF NOT EXISTS komp.branch_centroid_768 (
branch_id BIGINT PRIMARY KEY REFERENCES komp.branch(id) ON DELETE CASCADE,
embedding VECTOR(768) NOT NULL,
updated_at TIMESTAMPTZ DEFAULT now()
);
CREATE TABLE IF NOT EXISTS komp.branch_centroid_1024 (
branch_id BIGINT PRIMARY KEY REFERENCES komp.branch(id) ON DELETE CASCADE,
embedding VECTOR(1024) NOT NULL,
updated_at TIMESTAMPTZ DEFAULT now()
);
-- Soft membership of chunks to branches
CREATE TABLE IF NOT EXISTS komp.chunk_branch (
chunk_id BIGINT REFERENCES komp.chunk(id) ON DELETE CASCADE,
branch_id BIGINT REFERENCES komp.branch(id) ON DELETE CASCADE,
weight REAL NOT NULL CHECK (weight >= 0 AND weight <= 1),
PRIMARY KEY(chunk_id, branch_id)
);
-- Relations between chunks (similarity / cites / derives / contradicts / …)
CREATE TABLE IF NOT EXISTS komp.chunk_edge (
src_chunk_id BIGINT REFERENCES komp.chunk(id) ON DELETE CASCADE,
dst_chunk_id BIGINT REFERENCES komp.chunk(id) ON DELETE CASCADE,
relation TEXT NOT NULL,
weight REAL,
meta JSONB DEFAULT '{}'::jsonb,
PRIMARY KEY(src_chunk_id, dst_chunk_id, relation)
);
CREATE OR REPLACE VIEW komp.latest_sources AS
SELECT s.*, max(c.created_at) AS last_chunk_at
FROM komp.source s LEFT JOIN komp.chunk c ON c.source_id = s.id
GROUP BY s.id;