#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Launch Capacity Upper Bound Analysis - Rigorous Methodology

This analysis uses a two-pronged approach:
1. DATA-DRIVEN: Unconstrained Richards model fitting to historical data
2. PHYSICS-BASED: Independent derivation from launch site constraints

The convergence of both approaches provides robust justification for
the 1 launch/site/day assumption.

Methodology:
- Step 1: Fit Richards model WITHOUT constraining K (let data speak)
- Step 2: Derive physical upper bound from site capacity analysis
- Step 3: Compare data-driven K with physics-based limit
- Step 4: If they converge, the conclusion is well-supported
"""

import pandas as pd
import numpy as np
from scipy.optimize import curve_fit
from scipy.stats import pearsonr
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

plt.rcParams["font.sans-serif"] = ["Arial Unicode MS", "SimHei", "DejaVu Sans"]
plt.rcParams["axes.unicode_minus"] = False


# ============================================================
# 1. Richards Growth Model
# ============================================================

def richards(t, K, r, t0, v):
    """
    Richards curve (generalized logistic model)
    
    N(t) = K / (1 + exp(-r*(t - t0)))^(1/v)
    
    Parameters:
        K  : Carrying capacity (saturation limit) - TO BE DETERMINED BY DATA
        r  : Growth rate
        t0 : Inflection point time
        v  : Shape parameter (v=1 → standard logistic)
    
    Selection rationale:
    - More flexible than Logistic (symmetric) or Gompertz (fixed asymmetry)
    - Shape parameter v captures technology adoption patterns
    - Widely used in technology forecasting literature
    """
    exp_term = np.exp(-r * (t - t0))
    exp_term = np.clip(exp_term, 1e-10, 1e10)
    return K / np.power(1 + exp_term, 1/v)


# ============================================================
# 2. Data Loading
# ============================================================

def load_data(filepath="rocket_launch_counts.csv"):
    """Load and preprocess rocket launch data"""
    df = pd.read_csv(filepath)
    df = df.rename(columns={"YDate": "year", "Total": "launches"})
    df["year"] = pd.to_numeric(df["year"], errors="coerce")
    df["launches"] = pd.to_numeric(df["launches"], errors="coerce")
    df = df.dropna(subset=["year", "launches"])
    df = df[(df["year"] >= 1957) & (df["year"] <= 2025)]
    df = df.astype({"year": int, "launches": int})
    df = df.sort_values("year").reset_index(drop=True)
    return df


# ============================================================
# 3. Model Fitting - UNCONSTRAINED (Data-Driven)
# ============================================================

def fit_richards_unconstrained(data, base_year=1957):
    """
    Fit Richards model WITHOUT artificial constraints on K.
    
    This allows the data to determine the saturation limit naturally.
    K upper bound is set very high (100,000) to not artificially limit results.
    """
    years = data["year"].values
    launches = data["launches"].values
    t = (years - base_year).astype(float)
    
    # Initial parameters (reasonable starting point)
    p0 = [5000.0, 0.08, 80.0, 2.0]
    
    # UNCONSTRAINED bounds: K allowed to range from 500 to 100,000
    # This wide range ensures data determines K, not artificial limits
    bounds = ([500, 0.005, 10, 0.2], [100000, 1.0, 200, 10.0])
    
    try:
        popt, pcov = curve_fit(richards, t, launches, p0=p0, bounds=bounds, maxfev=100000)
        perr = np.sqrt(np.diag(pcov))
        y_pred = richards(t, *popt)
        
        # Goodness of fit metrics
        ss_res = np.sum((launches - y_pred) ** 2)
        ss_tot = np.sum((launches - np.mean(launches)) ** 2)
        r_squared = 1 - (ss_res / ss_tot)
        rmse = np.sqrt(np.mean((launches - y_pred) ** 2))
        
        # AIC/BIC for model selection
        n = len(launches)
        k_params = 4  # Richards has 4 parameters
        aic = n * np.log(ss_res/n) + 2 * k_params
        bic = n * np.log(ss_res/n) + k_params * np.log(n)
        
        K, r, t0, v = popt
        K_err, r_err, t0_err, v_err = perr
        
        return {
            "success": True,
            "params": popt,
            "param_err": perr,
            "K": K,
            "K_err": K_err,
            "r": r,
            "t0": t0,
            "v": v,
            "inflection_year": base_year + t0,
            "inflection_value": K / (v + 1) ** (1/v),
            "r_squared": r_squared,
            "rmse": rmse,
            "aic": aic,
            "bic": bic,
            "y_pred": y_pred,
            "years": years,
            "launches": launches,
            "t": t,
            "base_year": base_year,
        }
    except Exception as e:
        return {"success": False, "error": str(e)}


def bootstrap_K_estimate(data, n_bootstrap=1000, base_year=1957):
    """
    Bootstrap analysis for K uncertainty estimation (UNCONSTRAINED).
    
    This provides confidence intervals for the data-driven K estimate
    without imposing physical constraints.
    """
    years = data["year"].values
    launches = data["launches"].values
    t = (years - base_year).astype(float)
    n = len(t)
    
    p0 = [5000.0, 0.08, 80.0, 2.0]
    bounds = ([500, 0.005, 10, 0.2], [100000, 1.0, 200, 10.0])
    
    K_samples = []
    for _ in range(n_bootstrap):
        idx = np.random.choice(n, n, replace=True)
        t_boot = t[idx]
        y_boot = launches[idx]
        
        try:
            popt, _ = curve_fit(richards, t_boot, y_boot, p0=p0, bounds=bounds, maxfev=20000)
            K_samples.append(popt[0])
        except:
            pass
    
    if len(K_samples) > 100:
        return {
            "mean": np.mean(K_samples),
            "median": np.median(K_samples),
            "std": np.std(K_samples),
            "ci_5": np.percentile(K_samples, 5),
            "ci_25": np.percentile(K_samples, 25),
            "ci_75": np.percentile(K_samples, 75),
            "ci_95": np.percentile(K_samples, 95),
            "n_valid": len(K_samples),
            "samples": K_samples,
        }
    return None


# ============================================================
# 4. Physical Constraint Analysis (Independent Derivation)
# ============================================================

def derive_physical_limits():
    """
    Derive launch capacity limits from physical/operational constraints.
    
    This is INDEPENDENT of the statistical model - based purely on
    engineering and operational considerations.
    """
    
    # Number of major launch sites (problem specifies 10)
    n_sites = 10
    
    # Per-site capacity analysis
    constraints = {
        "pad_turnaround": {
            "description": "Launch pad turnaround time",
            "min_hours": 24,  # Minimum for rapid-reuse (SpaceX target)
            "typical_hours": 48,  # Current state-of-art
            "conservative_hours": 168,  # Weekly launches
        },
        "range_safety": {
            "description": "Range safety clearance",
            "note": "Adjacent pads cannot launch simultaneously",
        },
        "weather_windows": {
            "description": "Weather constraints",
            "usable_days_fraction": 0.7,  # ~70% of days have acceptable weather
        },
        "trajectory_conflicts": {
            "description": "Orbital slot and trajectory coordination",
            "note": "Multiple launches per day require careful scheduling",
        },
    }
    
    # Derive per-site daily capacity
    scenarios = {
        "Conservative (current tech)": {
            "launches_per_site_per_day": 1/7,  # Weekly launches
            "justification": "Current average for most sites",
        },
        "Moderate (near-term)": {
            "launches_per_site_per_day": 1/3,  # Every 3 days
            "justification": "Achievable with infrastructure investment",
        },
        "Optimistic (SpaceX-level)": {
            "launches_per_site_per_day": 1/1.5,  # Every 36 hours
            "justification": "Demonstrated by SpaceX at peak",
        },
        "Theoretical Maximum": {
            "launches_per_site_per_day": 1.0,  # Daily launches
            "justification": "Physical limit with 24h turnaround",
        },
    }
    
    # Calculate annual capacities
    results = {}
    for scenario_name, params in scenarios.items():
        daily_rate = params["launches_per_site_per_day"]
        annual_per_site = daily_rate * 365
        total_annual = annual_per_site * n_sites
        
        results[scenario_name] = {
            "daily_per_site": daily_rate,
            "annual_per_site": annual_per_site,
            "total_annual": total_annual,
            "justification": params["justification"],
        }
    
    return {
        "n_sites": n_sites,
        "constraints": constraints,
        "scenarios": results,
        "theoretical_max": n_sites * 365,  # 3650
    }


# ============================================================
# 5. Convergence Analysis
# ============================================================

def analyze_convergence(data_driven_K, bootstrap_results, physical_limits):
    """
    Compare data-driven K estimate with physics-based limits.
    
    If they converge, the conclusion is well-supported.
    """
    
    physical_max = physical_limits["theoretical_max"]  # 3650
    
    # Data-driven estimate
    K_data = data_driven_K
    K_ci_low = bootstrap_results["ci_5"] if bootstrap_results else K_data * 0.8
    K_ci_high = bootstrap_results["ci_95"] if bootstrap_results else K_data * 1.2
    
    # Convergence analysis
    analysis = {
        "data_driven_K": K_data,
        "data_driven_CI": (K_ci_low, K_ci_high),
        "physical_max": physical_max,
        "ratio": K_data / physical_max,
        "converged": K_ci_low <= physical_max <= K_ci_high * 1.5,  # Within range
    }
    
    # Interpretation
    if K_data < physical_max * 0.5:
        analysis["interpretation"] = "Data suggests saturation well below physical limit"
        analysis["recommendation"] = "Use data-driven K as conservative estimate"
    elif K_data < physical_max * 1.2:
        analysis["interpretation"] = "Data-driven K converges with physical maximum"
        analysis["recommendation"] = "Physical limit (1/site/day) is well-supported"
    else:
        analysis["interpretation"] = "Data suggests potential beyond current sites"
        analysis["recommendation"] = "May need to consider additional sites or technology"
    
    return analysis


# ============================================================
# 6. Visualization
# ============================================================

def plot_comprehensive_analysis(results_dict, physical_limits, convergence, 
                                save_path="launch_capacity_analysis.png"):
    """Generate comprehensive analysis plot with dual validation"""
    
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    colors = {
        "data": "#34495E",
        "model": "#27AE60",
        "physical": "#E74C3C",
        "ci": "#3498DB",
    }
    
    # ========== Plot 1: Model Fit (Unconstrained) ==========
    ax1 = axes[0, 0]
    
    res = results_dict["近15年"]
    if res["success"]:
        years = res["years"]
        launches = res["launches"]
        base_year = res["base_year"]
        
        # Historical data
        ax1.scatter(years, launches, color=colors["data"], s=60, alpha=0.8,
                   label="Historical Data (2010-2025)", zorder=3)
        
        # Model prediction
        years_proj = np.arange(2010, 2101)
        t_proj = years_proj - base_year
        pred = richards(t_proj, *res["params"])
        
        ax1.plot(years_proj, pred, color=colors["model"], lw=2.5,
                label=f'Richards Model (K={res["K"]:.0f}, R²={res["r_squared"]:.3f})')
        
        # Physical limit
        phys_max = physical_limits["theoretical_max"]
        ax1.axhline(phys_max, color=colors["physical"], ls='--', lw=2,
                   label=f'Physical Limit: {phys_max} (1/site/day)')
        
        # Bootstrap CI band
        boot = results_dict.get("bootstrap")
        if boot:
            # Show where K falls
            ax1.axhline(boot["ci_5"], color=colors["ci"], ls=':', lw=1.5, alpha=0.7)
            ax1.axhline(boot["ci_95"], color=colors["ci"], ls=':', lw=1.5, alpha=0.7,
                       label=f'90% CI: [{boot["ci_5"]:.0f}, {boot["ci_95"]:.0f}]')
        
        ax1.axvline(2025, color="gray", ls=":", lw=1, alpha=0.7)
        ax1.axvline(2050, color="blue", ls=":", lw=1.5, alpha=0.7)
        ax1.text(2052, 500, "2050", fontsize=10, color="blue")
        
        ax1.set_xlabel("Year", fontsize=11)
        ax1.set_ylabel("Annual Launches", fontsize=11)
        ax1.set_title("Step 1: Unconstrained Richards Model Fit\n(K determined by data, not preset)", fontsize=12)
        ax1.legend(loc="upper left", fontsize=9)
        ax1.grid(True, alpha=0.3)
        ax1.set_xlim(2008, 2100)
        ax1.set_ylim(0, max(res["K"] * 1.1, phys_max * 1.2))
    
    # ========== Plot 2: Bootstrap K Distribution ==========
    ax2 = axes[0, 1]
    
    boot = results_dict.get("bootstrap")
    if boot and "samples" in boot:
        samples = boot["samples"]
        
        # Histogram
        ax2.hist(samples, bins=40, color=colors["ci"], alpha=0.7, edgecolor='white',
                label=f'Bootstrap samples (n={boot["n_valid"]})')
        
        # Mark statistics
        ax2.axvline(boot["mean"], color=colors["model"], lw=2, ls='-',
                   label=f'Mean K = {boot["mean"]:.0f}')
        ax2.axvline(boot["ci_5"], color='orange', lw=2, ls='--',
                   label=f'5th percentile = {boot["ci_5"]:.0f}')
        ax2.axvline(boot["ci_95"], color='orange', lw=2, ls='--',
                   label=f'95th percentile = {boot["ci_95"]:.0f}')
        
        # Physical limit
        ax2.axvline(physical_limits["theoretical_max"], color=colors["physical"], 
                   lw=2.5, ls='-', label=f'Physical max = {physical_limits["theoretical_max"]}')
        
        ax2.set_xlabel("Carrying Capacity K (launches/year)", fontsize=11)
        ax2.set_ylabel("Frequency", fontsize=11)
        ax2.set_title("Step 2: Bootstrap Distribution of K\n(Uncertainty quantification)", fontsize=12)
        ax2.legend(loc="upper right", fontsize=9)
        ax2.grid(True, alpha=0.3)
    
    # ========== Plot 3: Physical Constraint Analysis ==========
    ax3 = axes[1, 0]
    
    scenarios = physical_limits["scenarios"]
    names = list(scenarios.keys())
    values = [scenarios[n]["total_annual"] for n in names]
    
    y_pos = np.arange(len(names))
    bars = ax3.barh(y_pos, values, color=colors["physical"], alpha=0.7, edgecolor='black')
    
    # Data-driven K
    data_K = results_dict["近15年"]["K"]
    ax3.axvline(data_K, color=colors["model"], lw=3, ls='-',
               label=f'Data-driven K = {data_K:.0f}')
    
    # Mark values
    for bar, val in zip(bars, values):
        ax3.text(val + 100, bar.get_y() + bar.get_height()/2,
                f'{val:.0f}', va='center', fontsize=10, fontweight='bold')
    
    ax3.set_yticks(y_pos)
    ax3.set_yticklabels(names, fontsize=9)
    ax3.set_xlabel("Annual Launch Capacity (10 sites)", fontsize=11)
    ax3.set_title("Step 3: Physics-Based Capacity Analysis\n(Independent of statistical model)", fontsize=12)
    ax3.legend(loc="lower right", fontsize=10)
    ax3.grid(True, alpha=0.3, axis='x')
    ax3.set_xlim(0, max(values) * 1.3)
    
    # ========== Plot 4: Convergence Summary ==========
    ax4 = axes[1, 1]
    ax4.axis('off')
    
    # Summary text
    boot_text = f"{boot['mean']:.0f} ± {boot['std']:.0f}" if boot else "N/A"
    ci_text = f"[{boot['ci_5']:.0f}, {boot['ci_95']:.0f}]" if boot else "N/A"
    
    summary = f"""
    ┌──────────────────────────────────────────────────────────────────────┐
    │                    DUAL VALIDATION SUMMARY                           │
    ├──────────────────────────────────────────────────────────────────────┤
    │                                                                      │
    │  STEP 1: DATA-DRIVEN ANALYSIS (Richards Model)                       │
    │  ─────────────────────────────────────────────                       │
    │  • Model: N(t) = K / (1 + exp(-r(t-t₀)))^(1/v)                       │
    │  • Fitting method: Nonlinear least squares (unconstrained)           │
    │  • Result: K = {results_dict['近15年']['K']:.0f} launches/year                           │
    │  • Goodness of fit: R² = {results_dict['近15年']['r_squared']:.4f}                             │
    │  • Bootstrap 90% CI: {ci_text:<25}                │
    │                                                                      │
    │  STEP 2: PHYSICS-BASED ANALYSIS                                      │
    │  ──────────────────────────────                                      │
    │  • Number of major launch sites: {physical_limits['n_sites']}                              │
    │  • Theoretical maximum (1/site/day): {physical_limits['theoretical_max']} launches/year       │
    │  • Key constraints: pad turnaround, range safety, weather            │
    │                                                                      │
    │  STEP 3: CONVERGENCE CHECK                                           │
    │  ─────────────────────────                                           │
    │  • Data-driven K / Physical max = {convergence['ratio']:.2f}                          │
    │  • Interpretation: {convergence['interpretation']:<30} │
    │                                                                      │
    │  CONCLUSION                                                          │
    │  ──────────                                                          │
    │  The unconstrained data-driven model yields K ≈ {results_dict['近15年']['K']:.0f},              │
    │  which {"converges with" if convergence['converged'] else "differs from"} the physics-based limit of {physical_limits['theoretical_max']}.             │
    │                                                                      │
    │  This {"SUPPORTS" if convergence['converged'] else "DOES NOT SUPPORT"} the assumption of 1 launch/site/day as the       │
    │  upper bound for rocket launch frequency.                            │
    │                                                                      │
    └──────────────────────────────────────────────────────────────────────┘
    """
    
    ax4.text(0.02, 0.98, summary, transform=ax4.transAxes,
            fontsize=10, verticalalignment='top', family='monospace',
            bbox=dict(boxstyle='round', facecolor='lightyellow', alpha=0.9))
    
    plt.suptitle("Launch Capacity Analysis: Data-Driven + Physics-Based Dual Validation", 
                fontsize=14, fontweight='bold', y=1.02)
    plt.tight_layout()
    plt.savefig(save_path, dpi=150, bbox_inches='tight')
    plt.close()
    print(f"Plot saved: {save_path}")


# ============================================================
# 7. Main Analysis
# ============================================================

def main():
    print("=" * 70)
    print("Launch Capacity Analysis - Dual Validation Methodology")
    print("=" * 70)
    print("""
    Methodology:
    1. DATA-DRIVEN: Fit Richards model WITHOUT constraining K
    2. PHYSICS-BASED: Derive limits from site capacity analysis
    3. CONVERGENCE: Compare both approaches for validation
    """)
    
    # Load data
    df = load_data()
    print(f"Data range: {df['year'].min()} - {df['year'].max()}")
    print(f"Data points: {len(df)}")
    print(f"\nRecent launch counts:")
    for _, row in df.tail(5).iterrows():
        print(f"  {int(row['year'])}: {int(row['launches'])} launches")
    
    # Define data windows
    windows = {
        "全部数据": df[df["year"] <= 2025].copy(),
        "近25年": df[(df["year"] >= 2000) & (df["year"] <= 2025)].copy(),
        "近15年": df[(df["year"] >= 2010) & (df["year"] <= 2025)].copy(),
        "近10年": df[(df["year"] >= 2015) & (df["year"] <= 2025)].copy(),
    }
    
    results_dict = {}
    
    # ========== STEP 1: Data-Driven Analysis ==========
    print("\n" + "=" * 70)
    print("STEP 1: DATA-DRIVEN ANALYSIS (Unconstrained Richards Model)")
    print("=" * 70)
    
    for window_name, data in windows.items():
        print(f"\n--- {window_name} ({data['year'].min()}-{data['year'].max()}, n={len(data)}) ---")
        
        result = fit_richards_unconstrained(data)
        results_dict[window_name] = result
        
        if result["success"]:
            print(f"  K (carrying capacity) = {result['K']:.0f} ± {result['K_err']:.0f}")
            print(f"  r (growth rate)       = {result['r']:.4f}")
            print(f"  t0 (inflection)       = {result['inflection_year']:.1f}")
            print(f"  v (shape)             = {result['v']:.3f}")
            print(f"  R²                    = {result['r_squared']:.4f}")
            print(f"  AIC                   = {result['aic']:.1f}")
    
    # Bootstrap for recent 15-year data
    print("\n--- Bootstrap Uncertainty (近15年, n=1000) ---")
    boot_result = bootstrap_K_estimate(windows["近15年"], n_bootstrap=1000)
    results_dict["bootstrap"] = boot_result
    
    if boot_result:
        print(f"  K mean   = {boot_result['mean']:.0f}")
        print(f"  K median = {boot_result['median']:.0f}")
        print(f"  K std    = {boot_result['std']:.0f}")
        print(f"  90% CI   = [{boot_result['ci_5']:.0f}, {boot_result['ci_95']:.0f}]")
    
    # ========== STEP 2: Physics-Based Analysis ==========
    print("\n" + "=" * 70)
    print("STEP 2: PHYSICS-BASED ANALYSIS")
    print("=" * 70)
    
    physical_limits = derive_physical_limits()
    
    print(f"\nNumber of major launch sites: {physical_limits['n_sites']}")
    print(f"\nPer-site capacity scenarios:")
    for name, params in physical_limits["scenarios"].items():
        print(f"  {name}:")
        print(f"    {params['daily_per_site']:.2f} launches/site/day")
        print(f"    → {params['total_annual']:.0f} launches/year (total)")
    
    print(f"\nTheoretical maximum (1/site/day): {physical_limits['theoretical_max']} launches/year")
    
    # ========== STEP 3: Convergence Analysis ==========
    print("\n" + "=" * 70)
    print("STEP 3: CONVERGENCE ANALYSIS")
    print("=" * 70)
    
    data_K = results_dict["近15年"]["K"]
    convergence = analyze_convergence(data_K, boot_result, physical_limits)
    
    print(f"\n  Data-driven K:    {convergence['data_driven_K']:.0f}")
    print(f"  Physical maximum: {convergence['physical_max']}")
    print(f"  Ratio:            {convergence['ratio']:.2f}")
    print(f"\n  Interpretation: {convergence['interpretation']}")
    print(f"  Recommendation: {convergence['recommendation']}")
    
    # ========== Generate Visualization ==========
    print("\n" + "=" * 70)
    print("Generating visualization...")
    print("=" * 70)
    
    plot_comprehensive_analysis(results_dict, physical_limits, convergence)
    
    # ========== Final Conclusion ==========
    print("\n" + "=" * 70)
    print("CONCLUSION")
    print("=" * 70)
    
    print(f"""
    1. DATA-DRIVEN RESULT:
       - Unconstrained Richards model yields K = {data_K:.0f} launches/year
       - Bootstrap 90% CI: [{boot_result['ci_5']:.0f}, {boot_result['ci_95']:.0f}]
    
    2. PHYSICS-BASED RESULT:
       - 10 sites × 365 days = {physical_limits['theoretical_max']} launches/year (theoretical max)
       - Based on: pad turnaround, range safety, weather constraints
    
    3. CONVERGENCE:
       - Data/Physics ratio = {convergence['ratio']:.2f}
       - {convergence['interpretation']}
    
    4. RECOMMENDATION:
       - {convergence['recommendation']}
       - For model: Adopt 1 launch/site/day = 365 launches/site/year as upper bound
    """)
    
    print("=" * 70)
    print("Analysis Complete!")
    print("=" * 70)
    
    return results_dict, physical_limits, convergence


if __name__ == "__main__":
    results, physical, convergence = main()