metal-kompanion/ingest/pipeline.qt-kde-bge-m3.yaml

91 lines
1.8 KiB
YAML

pipeline:
name: qt_kde_bge_m3
embed:
endpoint: "http://localhost:8080/embed"
dim: 1024
normalize: true
batch_size: 64
rate_limit_per_sec: 8
sources:
- name: qtbase
type: git
root: /home/kompanion/src/qt/qtbase
include:
- "**/*.cpp"
- "**/*.cc"
- "**/*.cxx"
- "**/*.h"
- "**/*.hpp"
- "**/*.qml"
- "**/*.md"
- "doc/**/*.qdoc"
exclude:
- "**/tests/**"
- "**/3rdparty/**"
framework: "Qt"
version: "qtbase@HEAD"
- name: kde-frameworks
type: git
root: /home/kompanion/src/kde/frameworks
include:
- "**/*.cpp"
- "**/*.h"
- "**/*.md"
- "**/*.rst"
exclude:
- "**/autotests/**"
- "**/build/**"
framework: "KDE Frameworks"
version: "kf6@HEAD"
chunking:
docs:
max_tokens: 700
overlap_tokens: 120
split_on:
- heading
- code_fence
- paragraph
code:
by: ctags
include_doc_comment: true
body_head_lines: 60
signature_first: true
attach_file_context: true
metadata:
compute:
- name: symbol_list
when: code
- name: section_path
when: docs
- name: lang
value: "en"
- name: license_scan
value: "auto|skipped"
db:
dsn: "postgresql://kom:kom@localhost:5432/kom"
schema: "retrieval"
tables:
items: "items"
chunks: "chunks"
embeddings: "embeddings"
quality:
pilot_eval:
queries:
- "QVector erase idiom"
- "How to connect Qt signal to lambda"
- "KF CoreAddons KRandom example"
- "QAbstractItemModel insertRows example"
k: 20
manual_check: true
hybrid:
enable_bm25_trgm: true
vector_k: 50
merge_topk: 10