240 lines
8.7 KiB
Python
240 lines
8.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Fixed NIST constant fetcher that handles:
|
|
1. HTTP 403 by using proper browser headers
|
|
2. Placeholder dots (...) in values
|
|
"""
|
|
|
|
import re
|
|
import urllib.request
|
|
|
|
def clean_nist_value(value_str):
|
|
"""
|
|
Clean NIST constant value string for conversion to Decimal/float
|
|
|
|
NIST values might have:
|
|
- Scientific notation: "1.23456789e-34"
|
|
- Uncertainty notation: "1.23456789(45)e-34"
|
|
- Exact notation: "299792458" (no uncertainty)
|
|
- Spacing issues: " 1.23456789e-34 "
|
|
- Internal spaces: "1.660 539 068 92 e-27"
|
|
- Placeholder dots: "1.054 571 817... e-34"
|
|
|
|
Returns clean string suitable for Decimal() or float() conversion
|
|
"""
|
|
if not isinstance(value_str, str):
|
|
return str(value_str)
|
|
|
|
# Remove leading/trailing whitespace
|
|
clean = value_str.strip()
|
|
|
|
# Remove uncertainty parentheses: "1.23456(78)" -> "1.23456"
|
|
uncertainty_pattern = r'\([0-9]+\)'
|
|
clean = re.sub(uncertainty_pattern, '', clean)
|
|
|
|
# Remove ALL internal whitespace
|
|
clean = re.sub(r'\s+', '', clean)
|
|
|
|
# Remove placeholder dots: "1.054571817...e-34" -> "1.054571817e-34"
|
|
clean = re.sub(r'\.\.\.+', '', clean)
|
|
|
|
# Remove any trailing dots if they exist
|
|
if clean.endswith('.'):
|
|
clean = clean[:-1]
|
|
|
|
# Validate the result looks like a number
|
|
try:
|
|
float(clean) # Test if it can be converted
|
|
return clean
|
|
except ValueError as e:
|
|
raise ValueError(f"Could not clean NIST value '{value_str}' -> '{clean}': {e}")
|
|
|
|
|
|
def fetch_nist_with_proper_headers():
|
|
"""
|
|
Fetch NIST constants with proper browser headers to avoid 403 Forbidden
|
|
"""
|
|
nist_url = "https://physics.nist.gov/cuu/Constants/Table/allascii.txt"
|
|
|
|
# Create request with browser-like headers
|
|
headers = {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
|
'Accept': 'text/plain,text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
'Accept-Language': 'en-US,en;q=0.5',
|
|
'Accept-Encoding': 'gzip, deflate',
|
|
'Connection': 'keep-alive',
|
|
'Upgrade-Insecure-Requests': '1',
|
|
}
|
|
|
|
try:
|
|
# Create request object with headers
|
|
req = urllib.request.Request(nist_url, headers=headers)
|
|
|
|
# Open with longer timeout
|
|
with urllib.request.urlopen(req, timeout=30) as response:
|
|
nist_data = response.read().decode('utf-8')
|
|
|
|
print("✓ Successfully fetched NIST constants with browser headers")
|
|
return nist_data
|
|
|
|
except urllib.error.HTTPError as e:
|
|
print(f"❌ HTTP Error {e.code}: {e.reason}")
|
|
print(f" URL: {nist_url}")
|
|
print(f" This might be a temporary server issue or blocking policy")
|
|
return None
|
|
|
|
except urllib.error.URLError as e:
|
|
print(f"❌ URL Error: {e.reason}")
|
|
return None
|
|
|
|
except Exception as e:
|
|
print(f"❌ Unexpected error fetching NIST data: {e}")
|
|
return None
|
|
|
|
|
|
def test_local_parsing_with_dots():
|
|
"""Test parsing with examples that include dots"""
|
|
print("Testing local parsing with dot-containing examples...")
|
|
print("=" * 60)
|
|
|
|
# Include examples with dots
|
|
test_lines = [
|
|
"Planck constant 6.626 070 15 e-34 (exact) J Hz^-1",
|
|
"reduced Planck constant 1.054 571 817... e-34 (exact) J s",
|
|
"speed of light in vacuum 299 792 458 (exact) m s^-1",
|
|
"elementary charge 1.602 176 634 e-19 (exact) C",
|
|
"electron mass 9.109 383 701 5 e-31 0.000 000 000 28 e-31 kg",
|
|
"fine-structure constant 7.297 352 566... e-3 0.000 000 001 1 e-3"
|
|
]
|
|
|
|
success_count = 0
|
|
|
|
for line in test_lines:
|
|
print(f"\nInput: {line}")
|
|
|
|
# Split by 2+ spaces
|
|
sections = re.split(r' {2,}', line)
|
|
print(f"Sections: {sections}")
|
|
|
|
# Extract parts
|
|
if len(sections) >= 2:
|
|
quantity = sections[0].strip()
|
|
value_part = sections[1].strip() if len(sections) > 1 else ""
|
|
|
|
print(f" Quantity: '{quantity}'")
|
|
print(f" Raw value: '{value_part}'")
|
|
|
|
try:
|
|
# Clean the value using our improved function
|
|
clean_value = clean_nist_value(value_part)
|
|
print(f" Clean value: '{clean_value}'")
|
|
|
|
float_val = float(clean_value)
|
|
print(f" ✓ Converted: {float_val:.3e}")
|
|
success_count += 1
|
|
|
|
except Exception as e:
|
|
print(f" ❌ Conversion failed: {e}")
|
|
|
|
print(f"\nSuccess rate: {success_count}/{len(test_lines)} ({success_count/len(test_lines)*100:.1f}%)")
|
|
return success_count == len(test_lines)
|
|
|
|
|
|
def test_real_nist_fetch():
|
|
"""Test fetching real NIST data with proper headers"""
|
|
print("\n" + "=" * 60)
|
|
print("Testing real NIST fetch with browser headers...")
|
|
|
|
nist_data = fetch_nist_with_proper_headers()
|
|
|
|
if not nist_data:
|
|
print("❌ Could not fetch NIST data")
|
|
return False
|
|
|
|
lines = nist_data.split('\n')
|
|
print(f"✓ Fetched {len(lines)} lines from NIST")
|
|
|
|
# Look for specific constants
|
|
target_constants = [
|
|
("Planck constant", 6e-34, 7e-34),
|
|
("reduced Planck constant", 1e-34, 1.1e-34),
|
|
("electron mass", 9e-31, 9.2e-31),
|
|
("elementary charge", 1.6e-19, 1.7e-19),
|
|
("speed of light", 2.9e8, 3.1e8),
|
|
("Bohr radius", 5e-11, 6e-11),
|
|
("fine-structure constant", 7e-3, 8e-3)
|
|
]
|
|
|
|
print(f"\nSearching for target constants...")
|
|
found_count = 0
|
|
|
|
for target, min_val, max_val in target_constants:
|
|
print(f"\nLooking for: {target}")
|
|
found = False
|
|
|
|
for line in lines:
|
|
if target.lower() in line.lower() and not line.strip().startswith('Quantity'):
|
|
print(f" Found: {line.strip()}")
|
|
|
|
# Test parsing this line
|
|
try:
|
|
sections = re.split(r' {2,}', line.strip())
|
|
if len(sections) >= 2:
|
|
value_part = sections[1].strip()
|
|
clean_value = clean_nist_value(value_part)
|
|
|
|
float_val = float(clean_value)
|
|
print(f" ✓ Parsed: '{value_part}' -> '{clean_value}' -> {float_val:.3e}")
|
|
|
|
# Validate the value is in expected range
|
|
if min_val <= float_val <= max_val:
|
|
print(f" ✓ Value in expected range [{min_val:.1e}, {max_val:.1e}]")
|
|
found = True
|
|
found_count += 1
|
|
else:
|
|
print(f" ⚠ Value outside expected range [{min_val:.1e}, {max_val:.1e}]")
|
|
|
|
break
|
|
|
|
except Exception as e:
|
|
print(f" ❌ Parse failed: {e}")
|
|
|
|
if not found:
|
|
print(f" ❌ Not found or couldn't parse correctly")
|
|
|
|
print(f"\nSummary: Found {found_count}/{len(target_constants)} constants successfully")
|
|
return found_count >= len(target_constants) * 0.8 # 80% success rate
|
|
|
|
|
|
def main():
|
|
print("IMPROVED NIST CONSTANT PARSER TEST")
|
|
print("=" * 60)
|
|
|
|
# Test 1: Local parsing with dots
|
|
print("TEST 1: Local parsing with improved dot handling")
|
|
local_success = test_local_parsing_with_dots()
|
|
|
|
# Test 2: Real NIST fetch with proper headers
|
|
print("\nTEST 2: Real NIST fetch with browser headers")
|
|
fetch_success = test_real_nist_fetch()
|
|
|
|
# Summary
|
|
print("\n" + "=" * 60)
|
|
print("FINAL RESULTS:")
|
|
print(f" Local parsing: {'✓ PASS' if local_success else '❌ FAIL'}")
|
|
print(f" NIST fetching: {'✓ PASS' if fetch_success else '❌ FAIL'}")
|
|
|
|
if local_success and fetch_success:
|
|
print("\n🎉 All tests passed! The parser should work correctly now.")
|
|
print("You can now run the enhanced precision verification script.")
|
|
else:
|
|
print("\n⚠ Some tests failed. Check the issues above.")
|
|
if not local_success:
|
|
print(" - Local parsing needs more work")
|
|
if not fetch_success:
|
|
print(" - NIST fetching still has issues (may need to use fallback constants)")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|