spin_paper/scripts/statistical_tests.py

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from astropy.io import fits
import pandas as pd
from astropy.coordinates import SkyCoord
from astropy import units as u

# ========================================
# TEST 1: COSMICFLOWS-4 STATISTICAL ANALYSIS
# ========================================

def test_cosmic_flows_sigma(velocity_data):
    """
    Test for excess centripetal acceleration in cosmic flows
    using your Cosmicflows-4 data
    """
    print("=== COSMICFLOWS-4 SIGMA TEST ===")

    # Define major attractors (SGX, SGY, SGZ coordinates in Mpc)
    attractors = {
        'Great_Attractor': {'pos': (-60, 0, 40), 'mass': 5e15},  # M_sun
        'Shapley': {'pos': (-140, 60, -16), 'mass': 1e16},
        'Perseus_Pisces': {'pos': (100, -20, 50), 'mass': 3e15}
    }

    # Your coordinate grid from the script
    N = 64
    L = 200.0
    coords = np.linspace(-L, L, N)
    X, Y, Z = np.meshgrid(coords, coords, coords, indexing='ij')

    # Extract velocity components (already in km/s)
    vx, vy, vz = velocity_data[0], velocity_data[1], velocity_data[2]

    sigma_eff_results = []

    for name, attractor in attractors.items():
        # Distance from attractor
        ax, ay, az = attractor['pos']
        r = np.sqrt((X - ax)**2 + (Y - ay)**2 + (Z - az)**2)

        # Radial velocity toward attractor
        r_hat_x = (ax - X) / r
        r_hat_y = (ay - Y) / r
        r_hat_z = (az - Z) / r
        v_radial = vx * r_hat_x + vy * r_hat_y + vz * r_hat_z

        # Test at different radii
        radii = np.array([20, 40, 60, 80, 100])  # Mpc

        for R in radii:
            # Select shell around attractor
            mask = (r > R-10) & (r < R+10) & (r > 10)  # 20 Mpc shell, avoid center

            if np.sum(mask) < 10:  # Need minimum points
                continue

            v_flow = np.abs(v_radial[mask])  # Infall speed
            r_shell = r[mask]

            # Observed centripetal acceleration
            a_obs = np.mean(v_flow**2 / (r_shell * 3.086e19))  # Convert Mpc to m

            # Expected gravitational acceleration
            G = 6.67e-11  # m^3 kg^-1 s^-2
            M_sun = 1.989e30  # kg
            a_grav = G * attractor['mass'] * M_sun / (R * 3.086e22)**2

            # Effective sigma
            sigma_eff = a_obs - a_grav

            # Statistical uncertainty (rough estimate)
            sigma_err = np.std(v_flow**2 / (r_shell * 3.086e19)) / np.sqrt(len(v_flow))

            sigma_eff_results.append({
                'attractor': name,
                'radius_Mpc': R,
                'sigma_eff': sigma_eff,
                'sigma_err': sigma_err,
                'n_points': np.sum(mask)
            })

            print(f"{name} at {R} Mpc: σ_eff = {sigma_eff:.2e} ± {sigma_err:.2e} m/s²")

    # Statistical test: Are all sigma_eff consistent with zero?
    sigma_values = [r['sigma_eff'] for r in sigma_eff_results]
    sigma_errors = [r['sigma_err'] for r in sigma_eff_results]

    # Weighted mean
    weights = 1 / np.array(sigma_errors)**2
    weighted_mean = np.sum(np.array(sigma_values) * weights) / np.sum(weights)
    weighted_err = 1 / np.sqrt(np.sum(weights))

    # Chi-squared test for consistency with zero
    chi2 = np.sum((np.array(sigma_values) / np.array(sigma_errors))**2)
    dof = len(sigma_values)
    p_value = 1 - stats.chi2.cdf(chi2, dof)

    print(f"\n=== STATISTICAL RESULTS ===")
    print(f"Weighted mean σ_eff: {weighted_mean:.2e} ± {weighted_err:.2e} m/s²")
    print(f"Chi-squared: {chi2:.2f} (dof={dof})")
    print(f"P-value for null hypothesis (σ=0): {p_value:.3f}")

    if p_value < 0.05:
        print("❌ REJECT null hypothesis - Evidence for non-zero σ!")
    else:
        print("✅ ACCEPT null hypothesis - No evidence for σ ≠ 0")

    # Upper limit (95% confidence)
    upper_limit = weighted_mean + 1.96 * weighted_err
    print(f"95% upper limit: σ < {upper_limit:.2e} m/s²")

    return sigma_eff_results

# ========================================
# TEST 2: GAIA STELLAR CLUSTERS
# ========================================

def test_stellar_clusters():
    """
    Test velocity dispersion excess in stellar clusters using Gaia data
    """
    print("\n=== STELLAR CLUSTER TEST ===")

    # Known open clusters with Gaia measurements
    clusters = pd.DataFrame({
        'name': ['Hyades', 'Pleiades', 'Praesepe', 'Coma_Ber', 'Alpha_Per', 'IC_2391'],
        'distance_pc': [47, 136, 184, 88, 172, 148],
        'tidal_radius_pc': [10, 15, 12, 7, 18, 8],
        'stellar_mass_Msun': [400, 800, 600, 200, 1200, 300],
        'observed_sigma_kms': [0.50, 0.65, 0.45, 0.35, 0.85, 0.40],
        'observed_sigma_err': [0.08, 0.12, 0.09, 0.08, 0.15, 0.10]
    })

    # Calculate predicted virial velocity dispersion
    G = 6.67e-11  # m^3 kg^-1 s^-2
    M_sun = 1.989e30  # kg
    pc_to_m = 3.086e16  # m

    clusters['predicted_sigma_kms'] = np.sqrt(
        G * clusters['stellar_mass_Msun'] * M_sun /
        (clusters['tidal_radius_pc'] * pc_to_m)
    ) / 1000  # Convert to km/s

    # Spin-tether prediction
    sigma_ST = 3e-13  # m/s^2 (your predicted value)
    clusters['ST_excess_kms'] = np.sqrt(
        (2 * sigma_ST * clusters['tidal_radius_pc'] * pc_to_m) / 3
    ) / 1000

    clusters['ST_predicted_sigma'] = np.sqrt(
        clusters['predicted_sigma_kms']**2 + clusters['ST_excess_kms']**2
    )

    # Statistical test
    print("Cluster Analysis:")
    print("-" * 60)
    for i, row in clusters.iterrows():
        excess_obs = row['observed_sigma_kms'] - row['predicted_sigma_kms']
        excess_pred = row['ST_excess_kms']
        significance = abs(excess_obs - excess_pred) / row['observed_sigma_err']

        print(f"{row['name']:12} | Obs: {row['observed_sigma_kms']:.2f} | "
              f"Vir: {row['predicted_sigma_kms']:.2f} | "
              f"ST Pred: {row['ST_predicted_sigma']:.2f} | "
              f"Signif: {significance:.1f}σ")

    # Correlation test: Does excess correlate with size (not mass)?
    excess_observed = clusters['observed_sigma_kms'] - clusters['predicted_sigma_kms']

    # Correlation with tidal radius (predicted by spin-tether)
    corr_radius, p_radius = stats.pearsonr(clusters['tidal_radius_pc'], excess_observed)

    # Correlation with mass (NOT predicted by spin-tether)
    corr_mass, p_mass = stats.pearsonr(clusters['stellar_mass_Msun'], excess_observed)

    print(f"\nCorrelation Analysis:")
    print(f"Excess vs Radius: r = {corr_radius:.3f}, p = {p_radius:.3f}")
    print(f"Excess vs Mass:   r = {corr_mass:.3f}, p = {p_mass:.3f}")

    if p_radius < 0.05 and abs(corr_radius) > abs(corr_mass):
        print("✅ SUPPORTS spin-tether: Excess correlates with SIZE not MASS")
    else:
        print("❌ Does NOT support spin-tether scaling")

    # Chi-squared test for model fit
    chi2_null = np.sum(((clusters['observed_sigma_kms'] - clusters['predicted_sigma_kms']) /
                       clusters['observed_sigma_err'])**2)
    chi2_ST = np.sum(((clusters['observed_sigma_kms'] - clusters['ST_predicted_sigma']) /
                     clusters['observed_sigma_err'])**2)

    dof = len(clusters) - 1
    p_null = 1 - stats.chi2.cdf(chi2_null, dof)
    p_ST = 1 - stats.chi2.cdf(chi2_ST, dof)

    print(f"\nModel Comparison:")
    print(f"Pure virial:     χ² = {chi2_null:.2f}, p = {p_null:.3f}")
    print(f"Spin-tether:     χ² = {chi2_ST:.2f}, p = {p_ST:.3f}")

    if chi2_ST < chi2_null:
        print("✅ Spin-tether model fits better")
    else:
        print("❌ Virial model fits better")

    return clusters

# ========================================
# TEST 3: DWARF GALAXY SCALING
# ========================================

def test_dwarf_galaxies():
    """
    Test dwarf galaxy velocity dispersions vs half-light radius
    """
    print("\n=== DWARF GALAXY TEST ===")

    # Compilation of dwarf galaxy data from literature
    dwarfs = pd.DataFrame({
        'name': ['Draco', 'Ursa_Minor', 'Sextans', 'Carina', 'Fornax',
                'Leo_I', 'Leo_II', 'Sculptor', 'Segue_1', 'Willman_1'],
        'half_light_radius_pc': [200, 180, 460, 250, 710, 250, 150, 300, 30, 25],
        'velocity_dispersion_kms': [9.1, 9.5, 7.9, 6.6, 11.7, 9.2, 6.7, 9.2, 3.9, 4.3],
        'vel_disp_error_kms': [1.2, 1.2, 1.8, 1.2, 0.8, 2.4, 1.4, 1.1, 1.2, 2.3],
        'stellar_mass_Msun': [2e5, 3e5, 4e5, 4e5, 2e7, 5e6, 7e5, 2e6, 3e2, 1e3]
    })

    # Predicted gravitational component (rough estimate)
    G = 6.67e-11
    M_sun = 1.989e30
    pc_to_m = 3.086e16

    # Assume dark matter scaling: M_DM ≈ 100 * M_stellar for ultra-faints
    dwarfs['total_mass_Msun'] = np.where(dwarfs['stellar_mass_Msun'] < 1e6,
                                        100 * dwarfs['stellar_mass_Msun'],
                                        50 * dwarfs['stellar_mass_Msun'])

    dwarfs['sigma_grav_kms'] = np.sqrt(
        G * dwarfs['total_mass_Msun'] * M_sun /
        (dwarfs['half_light_radius_pc'] * pc_to_m)
    ) / 1000

    # Spin-tether prediction: σ² = σ_grav² + σ_ST * r_half
    sigma_ST = 3e-13  # m/s^2
    dwarfs['sigma_ST_component'] = np.sqrt(
        sigma_ST * dwarfs['half_light_radius_pc'] * pc_to_m
    ) / 1000

    dwarfs['sigma_total_pred'] = np.sqrt(
        dwarfs['sigma_grav_kms']**2 + dwarfs['sigma_ST_component']**2
    )

    # Test the scaling
    print("Dwarf Galaxy Analysis:")
    print("-" * 70)
    for i, row in dwarfs.iterrows():
        residual = row['velocity_dispersion_kms'] - row['sigma_total_pred']
        significance = abs(residual) / row['vel_disp_error_kms']

        print(f"{row['name']:12} | Obs: {row['velocity_dispersion_kms']:4.1f} | "
              f"Pred: {row['sigma_total_pred']:4.1f} | "
              f"Resid: {residual:+4.1f} | {significance:.1f}σ")

    # Statistical tests
    residuals = dwarfs['velocity_dispersion_kms'] - dwarfs['sigma_total_pred']
    errors = dwarfs['vel_disp_error_kms']

    # Are residuals consistent with zero?
    chi2 = np.sum((residuals / errors)**2)
    dof = len(dwarfs) - 1
    p_value = 1 - stats.chi2.cdf(chi2, dof)

    print(f"\nStatistical Results:")
    print(f"χ² = {chi2:.2f} (dof = {dof})")
    print(f"P-value: {p_value:.3f}")

    if p_value > 0.05:
        print("✅ Residuals consistent with model")
    else:
        print("❌ Significant deviations from model")

    # Test scaling with radius
    corr_coeff, p_corr = stats.pearsonr(dwarfs['half_light_radius_pc'],
                                       dwarfs['velocity_dispersion_kms'])
    print(f"Correlation (σ vs r_half): r = {corr_coeff:.3f}, p = {p_corr:.3f}")

    return dwarfs

# ========================================
# MAIN ANALYSIS FUNCTION
# ========================================

def run_all_tests(cf4_velocity_data=None):
    """
    Run all statistical tests to validate/falsify spin-tether theory
    """
    print("SPIN-TETHER THEORY: STATISTICAL VALIDATION")
    print("=" * 50)

    # Test 1: Cosmic flows (if data available)
    if cf4_velocity_data is not None:
        cf4_results = test_cosmic_flows_sigma(cf4_velocity_data)
    else:
        print("CF4 data not provided - skipping cosmic flow test")
        cf4_results = None

    # Test 2: Stellar clusters
    cluster_results = test_stellar_clusters()

    # Test 3: Dwarf galaxies
    dwarf_results = test_dwarf_galaxies()

    print("\n" + "=" * 50)
    print("FINAL VERDICT:")
    print("=" * 50)

    # Summary logic here would depend on results
    # This is where you'd implement your decision criteria

    return {
        'cosmic_flows': cf4_results,
        'clusters': cluster_results,
        'dwarfs': dwarf_results
    }

# ========================================
# USAGE EXAMPLE
# ========================================

if __name__ == "__main__":
    # Load your CF4 data if available
    try:
        filename = "cosmic_data/CF4gp_new_64-z008_velocity.fits"
        with fits.open(filename) as hdul:
            velocity_data = hdul[0].data * 52.0  # Convert to km/s

        results = run_all_tests(velocity_data)
    except FileNotFoundError:
        print("CF4 data file not found - running tests without cosmic flow data")
        results = run_all_tests()

    # Additional plotting/analysis code would go here