#!/usr/bin/env python3 """ Fixed NIST constant fetcher that handles: 1. HTTP 403 by using proper browser headers 2. Placeholder dots (...) in values """ import re import urllib.request def clean_nist_value(value_str): """ Clean NIST constant value string for conversion to Decimal/float NIST values might have: - Scientific notation: "1.23456789e-34" - Uncertainty notation: "1.23456789(45)e-34" - Exact notation: "299792458" (no uncertainty) - Spacing issues: " 1.23456789e-34 " - Internal spaces: "1.660 539 068 92 e-27" - Placeholder dots: "1.054 571 817... e-34" Returns clean string suitable for Decimal() or float() conversion """ if not isinstance(value_str, str): return str(value_str) # Remove leading/trailing whitespace clean = value_str.strip() # Remove uncertainty parentheses: "1.23456(78)" -> "1.23456" uncertainty_pattern = r'\([0-9]+\)' clean = re.sub(uncertainty_pattern, '', clean) # Remove ALL internal whitespace clean = re.sub(r'\s+', '', clean) # Remove placeholder dots: "1.054571817...e-34" -> "1.054571817e-34" clean = re.sub(r'\.\.\.+', '', clean) # Remove any trailing dots if they exist if clean.endswith('.'): clean = clean[:-1] # Validate the result looks like a number try: float(clean) # Test if it can be converted return clean except ValueError as e: raise ValueError(f"Could not clean NIST value '{value_str}' -> '{clean}': {e}") def fetch_nist_with_proper_headers(): """ Fetch NIST constants with proper browser headers to avoid 403 Forbidden """ nist_url = "https://physics.nist.gov/cuu/Constants/Table/allascii.txt" # Create request with browser-like headers headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/plain,text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip, deflate', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', } try: # Create request object with headers req = urllib.request.Request(nist_url, headers=headers) # Open with longer timeout with urllib.request.urlopen(req, timeout=30) as response: nist_data = response.read().decode('utf-8') print("✓ Successfully fetched NIST constants with browser headers") return nist_data except urllib.error.HTTPError as e: print(f"❌ HTTP Error {e.code}: {e.reason}") print(f" URL: {nist_url}") print(f" This might be a temporary server issue or blocking policy") return None except urllib.error.URLError as e: print(f"❌ URL Error: {e.reason}") return None except Exception as e: print(f"❌ Unexpected error fetching NIST data: {e}") return None def test_local_parsing_with_dots(): """Test parsing with examples that include dots""" print("Testing local parsing with dot-containing examples...") print("=" * 60) # Include examples with dots test_lines = [ "Planck constant 6.626 070 15 e-34 (exact) J Hz^-1", "reduced Planck constant 1.054 571 817... e-34 (exact) J s", "speed of light in vacuum 299 792 458 (exact) m s^-1", "elementary charge 1.602 176 634 e-19 (exact) C", "electron mass 9.109 383 701 5 e-31 0.000 000 000 28 e-31 kg", "fine-structure constant 7.297 352 566... e-3 0.000 000 001 1 e-3" ] success_count = 0 for line in test_lines: print(f"\nInput: {line}") # Split by 2+ spaces sections = re.split(r' {2,}', line) print(f"Sections: {sections}") # Extract parts if len(sections) >= 2: quantity = sections[0].strip() value_part = sections[1].strip() if len(sections) > 1 else "" print(f" Quantity: '{quantity}'") print(f" Raw value: '{value_part}'") try: # Clean the value using our improved function clean_value = clean_nist_value(value_part) print(f" Clean value: '{clean_value}'") float_val = float(clean_value) print(f" ✓ Converted: {float_val:.3e}") success_count += 1 except Exception as e: print(f" ❌ Conversion failed: {e}") print(f"\nSuccess rate: {success_count}/{len(test_lines)} ({success_count/len(test_lines)*100:.1f}%)") return success_count == len(test_lines) def test_real_nist_fetch(): """Test fetching real NIST data with proper headers""" print("\n" + "=" * 60) print("Testing real NIST fetch with browser headers...") nist_data = fetch_nist_with_proper_headers() if not nist_data: print("❌ Could not fetch NIST data") return False lines = nist_data.split('\n') print(f"✓ Fetched {len(lines)} lines from NIST") # Look for specific constants target_constants = [ ("Planck constant", 6e-34, 7e-34), ("reduced Planck constant", 1e-34, 1.1e-34), ("electron mass", 9e-31, 9.2e-31), ("elementary charge", 1.6e-19, 1.7e-19), ("speed of light", 2.9e8, 3.1e8), ("Bohr radius", 5e-11, 6e-11), ("fine-structure constant", 7e-3, 8e-3) ] print(f"\nSearching for target constants...") found_count = 0 for target, min_val, max_val in target_constants: print(f"\nLooking for: {target}") found = False for line in lines: if target.lower() in line.lower() and not line.strip().startswith('Quantity'): print(f" Found: {line.strip()}") # Test parsing this line try: sections = re.split(r' {2,}', line.strip()) if len(sections) >= 2: value_part = sections[1].strip() clean_value = clean_nist_value(value_part) float_val = float(clean_value) print(f" ✓ Parsed: '{value_part}' -> '{clean_value}' -> {float_val:.3e}") # Validate the value is in expected range if min_val <= float_val <= max_val: print(f" ✓ Value in expected range [{min_val:.1e}, {max_val:.1e}]") found = True found_count += 1 else: print(f" ⚠ Value outside expected range [{min_val:.1e}, {max_val:.1e}]") break except Exception as e: print(f" ❌ Parse failed: {e}") if not found: print(f" ❌ Not found or couldn't parse correctly") print(f"\nSummary: Found {found_count}/{len(target_constants)} constants successfully") return found_count >= len(target_constants) * 0.8 # 80% success rate def main(): print("IMPROVED NIST CONSTANT PARSER TEST") print("=" * 60) # Test 1: Local parsing with dots print("TEST 1: Local parsing with improved dot handling") local_success = test_local_parsing_with_dots() # Test 2: Real NIST fetch with proper headers print("\nTEST 2: Real NIST fetch with browser headers") fetch_success = test_real_nist_fetch() # Summary print("\n" + "=" * 60) print("FINAL RESULTS:") print(f" Local parsing: {'✓ PASS' if local_success else '❌ FAIL'}") print(f" NIST fetching: {'✓ PASS' if fetch_success else '❌ FAIL'}") if local_success and fetch_success: print("\n🎉 All tests passed! The parser should work correctly now.") print("You can now run the enhanced precision verification script.") else: print("\n⚠ Some tests failed. Check the issues above.") if not local_success: print(" - Local parsing needs more work") if not fetch_success: print(" - NIST fetching still has issues (may need to use fallback constants)") if __name__ == "__main__": main()