276 lines
8.6 KiB
Python
276 lines
8.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
get_si_constant.py
|
|
|
|
Fetches SI constants from https://si-digital-framework.org/constants
|
|
Saves as constants.ttl and outputs as JSON.
|
|
|
|
Usage:
|
|
python get_si_constant.py # All constants
|
|
python get_si_constant.py planck # Constants matching "planck"
|
|
python get_si_constant.py --help # Show help
|
|
|
|
Part of the ΞSUS project: https://esus.name
|
|
Repository: https://git.esus.name/esus/spin_paper/
|
|
License: CC BY-SA 4.0
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import json
|
|
import requests
|
|
import argparse
|
|
from typing import Dict, Any, Optional
|
|
|
|
def fetch_constants() -> str:
|
|
"""Fetch constants from SI digital framework if not already cached"""
|
|
ttl_file = "constants.ttl"
|
|
|
|
if not os.path.exists(ttl_file):
|
|
print("Fetching constants from https://si-digital-framework.org/constants...", file=sys.stderr)
|
|
|
|
try:
|
|
# Set proper headers as seen in the request
|
|
headers = {
|
|
'User-Agent': 'ΞSUS-project/1.0 (https://esus.name)',
|
|
'Accept': 'text/turtle, application/rdf+xml, */*'
|
|
}
|
|
|
|
response = requests.get(
|
|
"https://si-digital-framework.org/constants",
|
|
headers=headers,
|
|
timeout=30
|
|
)
|
|
response.raise_for_status()
|
|
|
|
with open(ttl_file, 'w', encoding='utf-8') as f:
|
|
f.write(response.text)
|
|
|
|
print(f"Saved {len(response.text)} bytes to {ttl_file}", file=sys.stderr)
|
|
|
|
except requests.RequestException as e:
|
|
print(f"Error fetching constants: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
else:
|
|
print(f"Using cached {ttl_file}", file=sys.stderr)
|
|
|
|
return ttl_file
|
|
|
|
def parse_ttl_to_json(ttl_file: str, filter_name: Optional[str] = None) -> Dict[str, Any]:
|
|
"""Parse TTL file and convert to JSON format"""
|
|
|
|
try:
|
|
# Try to import rdflib, fallback to basic parsing if not available
|
|
try:
|
|
from rdflib import Graph
|
|
return parse_with_rdflib(ttl_file, filter_name)
|
|
except ImportError:
|
|
print("rdflib not available, using basic TTL parsing", file=sys.stderr)
|
|
return parse_ttl_basic(ttl_file, filter_name)
|
|
|
|
except Exception as e:
|
|
print(f"Error parsing TTL file: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
def parse_with_rdflib(ttl_file: str, filter_name: Optional[str] = None) -> Dict[str, Any]:
|
|
"""Parse TTL using rdflib library"""
|
|
from rdflib import Graph
|
|
|
|
g = Graph()
|
|
g.parse(ttl_file, format="turtle")
|
|
|
|
constants = {}
|
|
|
|
# Group triples by subject
|
|
subjects = set(g.subjects())
|
|
|
|
for subject in subjects:
|
|
# Extract identifier from URI
|
|
subj_str = str(subject)
|
|
if '/' in subj_str:
|
|
const_id = subj_str.split('/')[-1]
|
|
elif '#' in subj_str:
|
|
const_id = subj_str.split('#')[-1]
|
|
else:
|
|
const_id = subj_str
|
|
|
|
# Skip if it's just a namespace or empty
|
|
if not const_id or const_id in ['', 'http', 'https']:
|
|
continue
|
|
|
|
const_data = {'uri': subj_str}
|
|
|
|
# Get all properties for this subject
|
|
for predicate, obj in g.predicate_objects(subject):
|
|
pred_str = str(predicate)
|
|
obj_str = str(obj)
|
|
|
|
# Extract property name
|
|
if '/' in pred_str:
|
|
prop_name = pred_str.split('/')[-1]
|
|
elif '#' in pred_str:
|
|
prop_name = pred_str.split('#')[-1]
|
|
else:
|
|
prop_name = pred_str
|
|
|
|
const_data[prop_name] = obj_str
|
|
|
|
constants[const_id] = const_data
|
|
|
|
return filter_constants(constants, filter_name)
|
|
|
|
def parse_ttl_basic(ttl_file: str, filter_name: Optional[str] = None) -> Dict[str, Any]:
|
|
"""Basic TTL parsing without rdflib (fallback)"""
|
|
constants = {}
|
|
|
|
with open(ttl_file, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Very basic TTL parsing - this is a simplified approach
|
|
lines = content.split('\n')
|
|
current_subject = None
|
|
|
|
for line in lines:
|
|
line = line.strip()
|
|
if not line or line.startswith('#') or line.startswith('@'):
|
|
continue
|
|
|
|
# Look for subject lines (simplified)
|
|
if line.startswith('<') and '>' in line:
|
|
parts = line.split('>')
|
|
if len(parts) > 0:
|
|
uri = parts[0][1:] # Remove < at start
|
|
if '/' in uri:
|
|
const_id = uri.split('/')[-1]
|
|
elif '#' in uri:
|
|
const_id = uri.split('#')[-1]
|
|
else:
|
|
const_id = uri
|
|
|
|
if const_id and const_id not in constants:
|
|
constants[const_id] = {'uri': uri, 'raw_line': line}
|
|
current_subject = const_id
|
|
|
|
# Store additional properties (very basic)
|
|
elif current_subject and ':' in line:
|
|
constants[current_subject]['raw_data'] = constants[current_subject].get('raw_data', [])
|
|
constants[current_subject]['raw_data'].append(line)
|
|
|
|
return filter_constants(constants, filter_name)
|
|
|
|
def filter_constants(constants: Dict[str, Any], filter_name: Optional[str] = None) -> Dict[str, Any]:
|
|
"""Filter constants by name if specified"""
|
|
if not filter_name:
|
|
return constants
|
|
|
|
filter_lower = filter_name.lower()
|
|
filtered_constants = {}
|
|
|
|
for const_id, const_data in constants.items():
|
|
match_found = False
|
|
|
|
# Check constant ID
|
|
if filter_lower in const_id.lower():
|
|
match_found = True
|
|
else:
|
|
# Check all property values
|
|
for prop_value in const_data.values():
|
|
if isinstance(prop_value, str) and filter_lower in prop_value.lower():
|
|
match_found = True
|
|
break
|
|
elif isinstance(prop_value, list):
|
|
for item in prop_value:
|
|
if isinstance(item, str) and filter_lower in item.lower():
|
|
match_found = True
|
|
break
|
|
|
|
if match_found:
|
|
filtered_constants[const_id] = const_data
|
|
|
|
return filtered_constants
|
|
|
|
def print_installation_help():
|
|
"""Print help for installing dependencies"""
|
|
print("""
|
|
To get better TTL parsing, install rdflib:
|
|
|
|
pip install rdflib
|
|
|
|
Or using conda:
|
|
|
|
conda install rdflib
|
|
|
|
For the basic version, no additional dependencies are needed.
|
|
""", file=sys.stderr)
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Fetch SI constants from digital framework and output as JSON",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
python get_si_constant.py # All constants
|
|
python get_si_constant.py planck # Constants matching "planck"
|
|
python get_si_constant.py electron # Constants matching "electron"
|
|
python get_si_constant.py hbar # Constants matching "hbar"
|
|
|
|
Part of the ΞSUS project: https://esus.name
|
|
Repository: https://git.esus.name/esus/spin_paper/
|
|
License: CC BY-SA 4.0
|
|
"""
|
|
)
|
|
|
|
parser.add_argument(
|
|
'filter_name',
|
|
nargs='?',
|
|
help='Filter constants by name (case-insensitive substring match)'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--indent',
|
|
type=int,
|
|
default=2,
|
|
help='JSON indentation (default: 2)'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--install-help',
|
|
action='store_true',
|
|
help='Show installation help for dependencies'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--raw',
|
|
action='store_true',
|
|
help='Output raw TTL content instead of parsed JSON'
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.install_help:
|
|
print_installation_help()
|
|
return
|
|
|
|
# Fetch constants
|
|
ttl_file = fetch_constants()
|
|
|
|
if args.raw:
|
|
# Just output the raw TTL content
|
|
with open(ttl_file, 'r', encoding='utf-8') as f:
|
|
print(f.read())
|
|
return
|
|
|
|
# Parse and convert to JSON
|
|
constants = parse_ttl_to_json(ttl_file, args.filter_name)
|
|
|
|
# Output JSON
|
|
print(json.dumps(constants, indent=args.indent, ensure_ascii=False))
|
|
|
|
# Print summary to stderr
|
|
if args.filter_name:
|
|
print(f"Found {len(constants)} constants matching '{args.filter_name}'", file=sys.stderr)
|
|
else:
|
|
print(f"Total constants: {len(constants)}", file=sys.stderr)
|
|
|
|
if __name__ == "__main__":
|
|
main() |