metal-kompanion/src/cli/py_embedder.py

59 lines
2.0 KiB
Python

#!/usr/bin/env python3
"""
Lightweight embedding helper moved from ingest/ for transparency.
Usage examples:
- Single embedding via Ollama:
OLLAMA_BASE=http://localhost:11434 \
./py_embedder.py --model bge-m3:latest --text "hello world"
- Batch from stdin (one line per text):
./py_embedder.py --model bge-m3:latest --stdin < texts.txt
Outputs JSON array of floats (for single text) or array-of-arrays for batches.
This script does not touch the database; it only produces vectors.
"""
import os, sys, json, argparse, requests
def embed_ollama(texts, model, base):
url = f"{base}/api/embeddings"
# Some Ollama models accept a single prompt; do one-by-one for reliability
out = []
for t in texts:
r = requests.post(url, json={"model": model, "prompt": t}, timeout=120)
r.raise_for_status()
data = r.json()
if "embedding" in data:
out.append(data["embedding"]) # single vector
elif "embeddings" in data:
out.extend(data["embeddings"]) # multiple vectors
else:
raise RuntimeError("Embedding response missing 'embedding(s)'")
return out
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--model", default=os.environ.get("EMBED_MODEL","bge-m3:latest"))
ap.add_argument("--text", help="Text to embed; if omitted, use --stdin")
ap.add_argument("--stdin", action="store_true", help="Read texts from stdin (one per line)")
ap.add_argument("--base", default=os.environ.get("OLLAMA_BASE","http://localhost:11434"))
args = ap.parse_args()
texts = []
if args.text:
texts = [args.text]
elif args.stdin:
texts = [line.rstrip("\n") for line in sys.stdin if line.strip()]
else:
ap.error("Provide --text or --stdin")
vectors = embed_ollama(texts, args.model, args.base)
if len(texts) == 1 and vectors:
print(json.dumps(vectors[0]))
else:
print(json.dumps(vectors))
if __name__ == "__main__":
main()