spin_paper/archive/experimental-scripts/simple_nist_test.py

240 lines
8.7 KiB
Python

#!/usr/bin/env python3
"""
Fixed NIST constant fetcher that handles:
1. HTTP 403 by using proper browser headers
2. Placeholder dots (...) in values
"""
import re
import urllib.request
def clean_nist_value(value_str):
"""
Clean NIST constant value string for conversion to Decimal/float
NIST values might have:
- Scientific notation: "1.23456789e-34"
- Uncertainty notation: "1.23456789(45)e-34"
- Exact notation: "299792458" (no uncertainty)
- Spacing issues: " 1.23456789e-34 "
- Internal spaces: "1.660 539 068 92 e-27"
- Placeholder dots: "1.054 571 817... e-34"
Returns clean string suitable for Decimal() or float() conversion
"""
if not isinstance(value_str, str):
return str(value_str)
# Remove leading/trailing whitespace
clean = value_str.strip()
# Remove uncertainty parentheses: "1.23456(78)" -> "1.23456"
uncertainty_pattern = r'\([0-9]+\)'
clean = re.sub(uncertainty_pattern, '', clean)
# Remove ALL internal whitespace
clean = re.sub(r'\s+', '', clean)
# Remove placeholder dots: "1.054571817...e-34" -> "1.054571817e-34"
clean = re.sub(r'\.\.\.+', '', clean)
# Remove any trailing dots if they exist
if clean.endswith('.'):
clean = clean[:-1]
# Validate the result looks like a number
try:
float(clean) # Test if it can be converted
return clean
except ValueError as e:
raise ValueError(f"Could not clean NIST value '{value_str}' -> '{clean}': {e}")
def fetch_nist_with_proper_headers():
"""
Fetch NIST constants with proper browser headers to avoid 403 Forbidden
"""
nist_url = "https://physics.nist.gov/cuu/Constants/Table/allascii.txt"
# Create request with browser-like headers
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Accept': 'text/plain,text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
}
try:
# Create request object with headers
req = urllib.request.Request(nist_url, headers=headers)
# Open with longer timeout
with urllib.request.urlopen(req, timeout=30) as response:
nist_data = response.read().decode('utf-8')
print("✓ Successfully fetched NIST constants with browser headers")
return nist_data
except urllib.error.HTTPError as e:
print(f"❌ HTTP Error {e.code}: {e.reason}")
print(f" URL: {nist_url}")
print(f" This might be a temporary server issue or blocking policy")
return None
except urllib.error.URLError as e:
print(f"❌ URL Error: {e.reason}")
return None
except Exception as e:
print(f"❌ Unexpected error fetching NIST data: {e}")
return None
def test_local_parsing_with_dots():
"""Test parsing with examples that include dots"""
print("Testing local parsing with dot-containing examples...")
print("=" * 60)
# Include examples with dots
test_lines = [
"Planck constant 6.626 070 15 e-34 (exact) J Hz^-1",
"reduced Planck constant 1.054 571 817... e-34 (exact) J s",
"speed of light in vacuum 299 792 458 (exact) m s^-1",
"elementary charge 1.602 176 634 e-19 (exact) C",
"electron mass 9.109 383 701 5 e-31 0.000 000 000 28 e-31 kg",
"fine-structure constant 7.297 352 566... e-3 0.000 000 001 1 e-3"
]
success_count = 0
for line in test_lines:
print(f"\nInput: {line}")
# Split by 2+ spaces
sections = re.split(r' {2,}', line)
print(f"Sections: {sections}")
# Extract parts
if len(sections) >= 2:
quantity = sections[0].strip()
value_part = sections[1].strip() if len(sections) > 1 else ""
print(f" Quantity: '{quantity}'")
print(f" Raw value: '{value_part}'")
try:
# Clean the value using our improved function
clean_value = clean_nist_value(value_part)
print(f" Clean value: '{clean_value}'")
float_val = float(clean_value)
print(f" ✓ Converted: {float_val:.3e}")
success_count += 1
except Exception as e:
print(f" ❌ Conversion failed: {e}")
print(f"\nSuccess rate: {success_count}/{len(test_lines)} ({success_count/len(test_lines)*100:.1f}%)")
return success_count == len(test_lines)
def test_real_nist_fetch():
"""Test fetching real NIST data with proper headers"""
print("\n" + "=" * 60)
print("Testing real NIST fetch with browser headers...")
nist_data = fetch_nist_with_proper_headers()
if not nist_data:
print("❌ Could not fetch NIST data")
return False
lines = nist_data.split('\n')
print(f"✓ Fetched {len(lines)} lines from NIST")
# Look for specific constants
target_constants = [
("Planck constant", 6e-34, 7e-34),
("reduced Planck constant", 1e-34, 1.1e-34),
("electron mass", 9e-31, 9.2e-31),
("elementary charge", 1.6e-19, 1.7e-19),
("speed of light", 2.9e8, 3.1e8),
("Bohr radius", 5e-11, 6e-11),
("fine-structure constant", 7e-3, 8e-3)
]
print(f"\nSearching for target constants...")
found_count = 0
for target, min_val, max_val in target_constants:
print(f"\nLooking for: {target}")
found = False
for line in lines:
if target.lower() in line.lower() and not line.strip().startswith('Quantity'):
print(f" Found: {line.strip()}")
# Test parsing this line
try:
sections = re.split(r' {2,}', line.strip())
if len(sections) >= 2:
value_part = sections[1].strip()
clean_value = clean_nist_value(value_part)
float_val = float(clean_value)
print(f" ✓ Parsed: '{value_part}' -> '{clean_value}' -> {float_val:.3e}")
# Validate the value is in expected range
if min_val <= float_val <= max_val:
print(f" ✓ Value in expected range [{min_val:.1e}, {max_val:.1e}]")
found = True
found_count += 1
else:
print(f" ⚠ Value outside expected range [{min_val:.1e}, {max_val:.1e}]")
break
except Exception as e:
print(f" ❌ Parse failed: {e}")
if not found:
print(f" ❌ Not found or couldn't parse correctly")
print(f"\nSummary: Found {found_count}/{len(target_constants)} constants successfully")
return found_count >= len(target_constants) * 0.8 # 80% success rate
def main():
print("IMPROVED NIST CONSTANT PARSER TEST")
print("=" * 60)
# Test 1: Local parsing with dots
print("TEST 1: Local parsing with improved dot handling")
local_success = test_local_parsing_with_dots()
# Test 2: Real NIST fetch with proper headers
print("\nTEST 2: Real NIST fetch with browser headers")
fetch_success = test_real_nist_fetch()
# Summary
print("\n" + "=" * 60)
print("FINAL RESULTS:")
print(f" Local parsing: {'✓ PASS' if local_success else '❌ FAIL'}")
print(f" NIST fetching: {'✓ PASS' if fetch_success else '❌ FAIL'}")
if local_success and fetch_success:
print("\n🎉 All tests passed! The parser should work correctly now.")
print("You can now run the enhanced precision verification script.")
else:
print("\n⚠ Some tests failed. Check the issues above.")
if not local_success:
print(" - Local parsing needs more work")
if not fetch_success:
print(" - NIST fetching still has issues (may need to use fallback constants)")
if __name__ == "__main__":
main()