spin_paper/archive/experimental-scripts/get_si_constant.py

276 lines
8.6 KiB
Python

#!/usr/bin/env python3
"""
get_si_constant.py
Fetches SI constants from https://si-digital-framework.org/constants
Saves as constants.ttl and outputs as JSON.
Usage:
python get_si_constant.py # All constants
python get_si_constant.py planck # Constants matching "planck"
python get_si_constant.py --help # Show help
Part of the ΞSUS project: https://esus.name
Repository: https://git.esus.name/esus/spin_paper/
License: CC BY-SA 4.0
"""
import os
import sys
import json
import requests
import argparse
from typing import Dict, Any, Optional
def fetch_constants() -> str:
"""Fetch constants from SI digital framework if not already cached"""
ttl_file = "constants.ttl"
if not os.path.exists(ttl_file):
print("Fetching constants from https://si-digital-framework.org/constants...", file=sys.stderr)
try:
# Set proper headers as seen in the request
headers = {
'User-Agent': 'ΞSUS-project/1.0 (https://esus.name)',
'Accept': 'text/turtle, application/rdf+xml, */*'
}
response = requests.get(
"https://si-digital-framework.org/constants",
headers=headers,
timeout=30
)
response.raise_for_status()
with open(ttl_file, 'w', encoding='utf-8') as f:
f.write(response.text)
print(f"Saved {len(response.text)} bytes to {ttl_file}", file=sys.stderr)
except requests.RequestException as e:
print(f"Error fetching constants: {e}", file=sys.stderr)
sys.exit(1)
else:
print(f"Using cached {ttl_file}", file=sys.stderr)
return ttl_file
def parse_ttl_to_json(ttl_file: str, filter_name: Optional[str] = None) -> Dict[str, Any]:
"""Parse TTL file and convert to JSON format"""
try:
# Try to import rdflib, fallback to basic parsing if not available
try:
from rdflib import Graph
return parse_with_rdflib(ttl_file, filter_name)
except ImportError:
print("rdflib not available, using basic TTL parsing", file=sys.stderr)
return parse_ttl_basic(ttl_file, filter_name)
except Exception as e:
print(f"Error parsing TTL file: {e}", file=sys.stderr)
sys.exit(1)
def parse_with_rdflib(ttl_file: str, filter_name: Optional[str] = None) -> Dict[str, Any]:
"""Parse TTL using rdflib library"""
from rdflib import Graph
g = Graph()
g.parse(ttl_file, format="turtle")
constants = {}
# Group triples by subject
subjects = set(g.subjects())
for subject in subjects:
# Extract identifier from URI
subj_str = str(subject)
if '/' in subj_str:
const_id = subj_str.split('/')[-1]
elif '#' in subj_str:
const_id = subj_str.split('#')[-1]
else:
const_id = subj_str
# Skip if it's just a namespace or empty
if not const_id or const_id in ['', 'http', 'https']:
continue
const_data = {'uri': subj_str}
# Get all properties for this subject
for predicate, obj in g.predicate_objects(subject):
pred_str = str(predicate)
obj_str = str(obj)
# Extract property name
if '/' in pred_str:
prop_name = pred_str.split('/')[-1]
elif '#' in pred_str:
prop_name = pred_str.split('#')[-1]
else:
prop_name = pred_str
const_data[prop_name] = obj_str
constants[const_id] = const_data
return filter_constants(constants, filter_name)
def parse_ttl_basic(ttl_file: str, filter_name: Optional[str] = None) -> Dict[str, Any]:
"""Basic TTL parsing without rdflib (fallback)"""
constants = {}
with open(ttl_file, 'r', encoding='utf-8') as f:
content = f.read()
# Very basic TTL parsing - this is a simplified approach
lines = content.split('\n')
current_subject = None
for line in lines:
line = line.strip()
if not line or line.startswith('#') or line.startswith('@'):
continue
# Look for subject lines (simplified)
if line.startswith('<') and '>' in line:
parts = line.split('>')
if len(parts) > 0:
uri = parts[0][1:] # Remove < at start
if '/' in uri:
const_id = uri.split('/')[-1]
elif '#' in uri:
const_id = uri.split('#')[-1]
else:
const_id = uri
if const_id and const_id not in constants:
constants[const_id] = {'uri': uri, 'raw_line': line}
current_subject = const_id
# Store additional properties (very basic)
elif current_subject and ':' in line:
constants[current_subject]['raw_data'] = constants[current_subject].get('raw_data', [])
constants[current_subject]['raw_data'].append(line)
return filter_constants(constants, filter_name)
def filter_constants(constants: Dict[str, Any], filter_name: Optional[str] = None) -> Dict[str, Any]:
"""Filter constants by name if specified"""
if not filter_name:
return constants
filter_lower = filter_name.lower()
filtered_constants = {}
for const_id, const_data in constants.items():
match_found = False
# Check constant ID
if filter_lower in const_id.lower():
match_found = True
else:
# Check all property values
for prop_value in const_data.values():
if isinstance(prop_value, str) and filter_lower in prop_value.lower():
match_found = True
break
elif isinstance(prop_value, list):
for item in prop_value:
if isinstance(item, str) and filter_lower in item.lower():
match_found = True
break
if match_found:
filtered_constants[const_id] = const_data
return filtered_constants
def print_installation_help():
"""Print help for installing dependencies"""
print("""
To get better TTL parsing, install rdflib:
pip install rdflib
Or using conda:
conda install rdflib
For the basic version, no additional dependencies are needed.
""", file=sys.stderr)
def main():
parser = argparse.ArgumentParser(
description="Fetch SI constants from digital framework and output as JSON",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python get_si_constant.py # All constants
python get_si_constant.py planck # Constants matching "planck"
python get_si_constant.py electron # Constants matching "electron"
python get_si_constant.py hbar # Constants matching "hbar"
Part of the ΞSUS project: https://esus.name
Repository: https://git.esus.name/esus/spin_paper/
License: CC BY-SA 4.0
"""
)
parser.add_argument(
'filter_name',
nargs='?',
help='Filter constants by name (case-insensitive substring match)'
)
parser.add_argument(
'--indent',
type=int,
default=2,
help='JSON indentation (default: 2)'
)
parser.add_argument(
'--install-help',
action='store_true',
help='Show installation help for dependencies'
)
parser.add_argument(
'--raw',
action='store_true',
help='Output raw TTL content instead of parsed JSON'
)
args = parser.parse_args()
if args.install_help:
print_installation_help()
return
# Fetch constants
ttl_file = fetch_constants()
if args.raw:
# Just output the raw TTL content
with open(ttl_file, 'r', encoding='utf-8') as f:
print(f.read())
return
# Parse and convert to JSON
constants = parse_ttl_to_json(ttl_file, args.filter_name)
# Output JSON
print(json.dumps(constants, indent=args.indent, ensure_ascii=False))
# Print summary to stderr
if args.filter_name:
print(f"Found {len(constants)} constants matching '{args.filter_name}'", file=sys.stderr)
else:
print(f"Total constants: {len(constants)}", file=sys.stderr)
if __name__ == "__main__":
main()