Files
2026_mcm_b/p1/launch_capacity_analysis.py
2026-02-02 16:07:12 +08:00

613 lines
24 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Launch Capacity Upper Bound Analysis - Rigorous Methodology
This analysis uses a two-pronged approach:
1. DATA-DRIVEN: Unconstrained Richards model fitting to historical data
2. PHYSICS-BASED: Independent derivation from launch site constraints
The convergence of both approaches provides robust justification for
the 1 launch/site/day assumption.
Methodology:
- Step 1: Fit Richards model WITHOUT constraining K (let data speak)
- Step 2: Derive physical upper bound from site capacity analysis
- Step 3: Compare data-driven K with physics-based limit
- Step 4: If they converge, the conclusion is well-supported
"""
import pandas as pd
import numpy as np
from scipy.optimize import curve_fit
from scipy.stats import pearsonr
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
plt.rcParams["font.sans-serif"] = ["Arial Unicode MS", "SimHei", "DejaVu Sans"]
plt.rcParams["axes.unicode_minus"] = False
# ============================================================
# 1. Richards Growth Model
# ============================================================
def richards(t, K, r, t0, v):
"""
Richards curve (generalized logistic model)
N(t) = K / (1 + exp(-r*(t - t0)))^(1/v)
Parameters:
K : Carrying capacity (saturation limit) - TO BE DETERMINED BY DATA
r : Growth rate
t0 : Inflection point time
v : Shape parameter (v=1 → standard logistic)
Selection rationale:
- More flexible than Logistic (symmetric) or Gompertz (fixed asymmetry)
- Shape parameter v captures technology adoption patterns
- Widely used in technology forecasting literature
"""
exp_term = np.exp(-r * (t - t0))
exp_term = np.clip(exp_term, 1e-10, 1e10)
return K / np.power(1 + exp_term, 1/v)
# ============================================================
# 2. Data Loading
# ============================================================
def load_data(filepath="rocket_launch_counts.csv"):
"""Load and preprocess rocket launch data"""
df = pd.read_csv(filepath)
df = df.rename(columns={"YDate": "year", "Total": "launches"})
df["year"] = pd.to_numeric(df["year"], errors="coerce")
df["launches"] = pd.to_numeric(df["launches"], errors="coerce")
df = df.dropna(subset=["year", "launches"])
df = df[(df["year"] >= 1957) & (df["year"] <= 2025)]
df = df.astype({"year": int, "launches": int})
df = df.sort_values("year").reset_index(drop=True)
return df
# ============================================================
# 3. Model Fitting - UNCONSTRAINED (Data-Driven)
# ============================================================
def fit_richards_unconstrained(data, base_year=1957):
"""
Fit Richards model WITHOUT artificial constraints on K.
This allows the data to determine the saturation limit naturally.
K upper bound is set very high (100,000) to not artificially limit results.
"""
years = data["year"].values
launches = data["launches"].values
t = (years - base_year).astype(float)
# Initial parameters (reasonable starting point)
p0 = [5000.0, 0.08, 80.0, 2.0]
# UNCONSTRAINED bounds: K allowed to range from 500 to 100,000
# This wide range ensures data determines K, not artificial limits
bounds = ([500, 0.005, 10, 0.2], [100000, 1.0, 200, 10.0])
try:
popt, pcov = curve_fit(richards, t, launches, p0=p0, bounds=bounds, maxfev=100000)
perr = np.sqrt(np.diag(pcov))
y_pred = richards(t, *popt)
# Goodness of fit metrics
ss_res = np.sum((launches - y_pred) ** 2)
ss_tot = np.sum((launches - np.mean(launches)) ** 2)
r_squared = 1 - (ss_res / ss_tot)
rmse = np.sqrt(np.mean((launches - y_pred) ** 2))
# AIC/BIC for model selection
n = len(launches)
k_params = 4 # Richards has 4 parameters
aic = n * np.log(ss_res/n) + 2 * k_params
bic = n * np.log(ss_res/n) + k_params * np.log(n)
K, r, t0, v = popt
K_err, r_err, t0_err, v_err = perr
return {
"success": True,
"params": popt,
"param_err": perr,
"K": K,
"K_err": K_err,
"r": r,
"t0": t0,
"v": v,
"inflection_year": base_year + t0,
"inflection_value": K / (v + 1) ** (1/v),
"r_squared": r_squared,
"rmse": rmse,
"aic": aic,
"bic": bic,
"y_pred": y_pred,
"years": years,
"launches": launches,
"t": t,
"base_year": base_year,
}
except Exception as e:
return {"success": False, "error": str(e)}
def bootstrap_K_estimate(data, n_bootstrap=1000, base_year=1957):
"""
Bootstrap analysis for K uncertainty estimation (UNCONSTRAINED).
This provides confidence intervals for the data-driven K estimate
without imposing physical constraints.
"""
years = data["year"].values
launches = data["launches"].values
t = (years - base_year).astype(float)
n = len(t)
p0 = [5000.0, 0.08, 80.0, 2.0]
bounds = ([500, 0.005, 10, 0.2], [100000, 1.0, 200, 10.0])
K_samples = []
for _ in range(n_bootstrap):
idx = np.random.choice(n, n, replace=True)
t_boot = t[idx]
y_boot = launches[idx]
try:
popt, _ = curve_fit(richards, t_boot, y_boot, p0=p0, bounds=bounds, maxfev=20000)
K_samples.append(popt[0])
except:
pass
if len(K_samples) > 100:
return {
"mean": np.mean(K_samples),
"median": np.median(K_samples),
"std": np.std(K_samples),
"ci_5": np.percentile(K_samples, 5),
"ci_25": np.percentile(K_samples, 25),
"ci_75": np.percentile(K_samples, 75),
"ci_95": np.percentile(K_samples, 95),
"n_valid": len(K_samples),
"samples": K_samples,
}
return None
# ============================================================
# 4. Physical Constraint Analysis (Independent Derivation)
# ============================================================
def derive_physical_limits():
"""
Derive launch capacity limits from physical/operational constraints.
This is INDEPENDENT of the statistical model - based purely on
engineering and operational considerations.
"""
# Number of major launch sites (problem specifies 10)
n_sites = 10
# Per-site capacity analysis
constraints = {
"pad_turnaround": {
"description": "Launch pad turnaround time",
"min_hours": 24, # Minimum for rapid-reuse (SpaceX target)
"typical_hours": 48, # Current state-of-art
"conservative_hours": 168, # Weekly launches
},
"range_safety": {
"description": "Range safety clearance",
"note": "Adjacent pads cannot launch simultaneously",
},
"weather_windows": {
"description": "Weather constraints",
"usable_days_fraction": 0.7, # ~70% of days have acceptable weather
},
"trajectory_conflicts": {
"description": "Orbital slot and trajectory coordination",
"note": "Multiple launches per day require careful scheduling",
},
}
# Derive per-site daily capacity
scenarios = {
"Conservative (current tech)": {
"launches_per_site_per_day": 1/7, # Weekly launches
"justification": "Current average for most sites",
},
"Moderate (near-term)": {
"launches_per_site_per_day": 1/3, # Every 3 days
"justification": "Achievable with infrastructure investment",
},
"Optimistic (SpaceX-level)": {
"launches_per_site_per_day": 1/1.5, # Every 36 hours
"justification": "Demonstrated by SpaceX at peak",
},
"Theoretical Maximum": {
"launches_per_site_per_day": 1.0, # Daily launches
"justification": "Physical limit with 24h turnaround",
},
}
# Calculate annual capacities
results = {}
for scenario_name, params in scenarios.items():
daily_rate = params["launches_per_site_per_day"]
annual_per_site = daily_rate * 365
total_annual = annual_per_site * n_sites
results[scenario_name] = {
"daily_per_site": daily_rate,
"annual_per_site": annual_per_site,
"total_annual": total_annual,
"justification": params["justification"],
}
return {
"n_sites": n_sites,
"constraints": constraints,
"scenarios": results,
"theoretical_max": n_sites * 365, # 3650
}
# ============================================================
# 5. Convergence Analysis
# ============================================================
def analyze_convergence(data_driven_K, bootstrap_results, physical_limits):
"""
Compare data-driven K estimate with physics-based limits.
If they converge, the conclusion is well-supported.
"""
physical_max = physical_limits["theoretical_max"] # 3650
# Data-driven estimate
K_data = data_driven_K
K_ci_low = bootstrap_results["ci_5"] if bootstrap_results else K_data * 0.8
K_ci_high = bootstrap_results["ci_95"] if bootstrap_results else K_data * 1.2
# Convergence analysis
analysis = {
"data_driven_K": K_data,
"data_driven_CI": (K_ci_low, K_ci_high),
"physical_max": physical_max,
"ratio": K_data / physical_max,
"converged": K_ci_low <= physical_max <= K_ci_high * 1.5, # Within range
}
# Interpretation
if K_data < physical_max * 0.5:
analysis["interpretation"] = "Data suggests saturation well below physical limit"
analysis["recommendation"] = "Use data-driven K as conservative estimate"
elif K_data < physical_max * 1.2:
analysis["interpretation"] = "Data-driven K converges with physical maximum"
analysis["recommendation"] = "Physical limit (1/site/day) is well-supported"
else:
analysis["interpretation"] = "Data suggests potential beyond current sites"
analysis["recommendation"] = "May need to consider additional sites or technology"
return analysis
# ============================================================
# 6. Visualization
# ============================================================
def plot_comprehensive_analysis(results_dict, physical_limits, convergence,
save_path="launch_capacity_analysis.png"):
"""Generate comprehensive analysis plot with dual validation"""
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
colors = {
"data": "#34495E",
"model": "#27AE60",
"physical": "#E74C3C",
"ci": "#3498DB",
}
# ========== Plot 1: Model Fit (Unconstrained) ==========
ax1 = axes[0, 0]
res = results_dict["近15年"]
if res["success"]:
years = res["years"]
launches = res["launches"]
base_year = res["base_year"]
# Historical data
ax1.scatter(years, launches, color=colors["data"], s=60, alpha=0.8,
label="Historical Data (2010-2025)", zorder=3)
# Model prediction
years_proj = np.arange(2010, 2101)
t_proj = years_proj - base_year
pred = richards(t_proj, *res["params"])
ax1.plot(years_proj, pred, color=colors["model"], lw=2.5,
label=f'Richards Model (K={res["K"]:.0f}, R²={res["r_squared"]:.3f})')
# Physical limit
phys_max = physical_limits["theoretical_max"]
ax1.axhline(phys_max, color=colors["physical"], ls='--', lw=2,
label=f'Physical Limit: {phys_max} (1/site/day)')
# Bootstrap CI band
boot = results_dict.get("bootstrap")
if boot:
# Show where K falls
ax1.axhline(boot["ci_5"], color=colors["ci"], ls=':', lw=1.5, alpha=0.7)
ax1.axhline(boot["ci_95"], color=colors["ci"], ls=':', lw=1.5, alpha=0.7,
label=f'90% CI: [{boot["ci_5"]:.0f}, {boot["ci_95"]:.0f}]')
ax1.axvline(2025, color="gray", ls=":", lw=1, alpha=0.7)
ax1.axvline(2050, color="blue", ls=":", lw=1.5, alpha=0.7)
ax1.text(2052, 500, "2050", fontsize=10, color="blue")
ax1.set_xlabel("Year", fontsize=11)
ax1.set_ylabel("Annual Launches", fontsize=11)
ax1.set_title("Step 1: Unconstrained Richards Model Fit\n(K determined by data, not preset)", fontsize=12)
ax1.legend(loc="upper left", fontsize=9)
ax1.grid(True, alpha=0.3)
ax1.set_xlim(2008, 2100)
ax1.set_ylim(0, max(res["K"] * 1.1, phys_max * 1.2))
# ========== Plot 2: Bootstrap K Distribution ==========
ax2 = axes[0, 1]
boot = results_dict.get("bootstrap")
if boot and "samples" in boot:
samples = boot["samples"]
# Histogram
ax2.hist(samples, bins=40, color=colors["ci"], alpha=0.7, edgecolor='white',
label=f'Bootstrap samples (n={boot["n_valid"]})')
# Mark statistics
ax2.axvline(boot["mean"], color=colors["model"], lw=2, ls='-',
label=f'Mean K = {boot["mean"]:.0f}')
ax2.axvline(boot["ci_5"], color='orange', lw=2, ls='--',
label=f'5th percentile = {boot["ci_5"]:.0f}')
ax2.axvline(boot["ci_95"], color='orange', lw=2, ls='--',
label=f'95th percentile = {boot["ci_95"]:.0f}')
# Physical limit
ax2.axvline(physical_limits["theoretical_max"], color=colors["physical"],
lw=2.5, ls='-', label=f'Physical max = {physical_limits["theoretical_max"]}')
ax2.set_xlabel("Carrying Capacity K (launches/year)", fontsize=11)
ax2.set_ylabel("Frequency", fontsize=11)
ax2.set_title("Step 2: Bootstrap Distribution of K\n(Uncertainty quantification)", fontsize=12)
ax2.legend(loc="upper right", fontsize=9)
ax2.grid(True, alpha=0.3)
# ========== Plot 3: Physical Constraint Analysis ==========
ax3 = axes[1, 0]
scenarios = physical_limits["scenarios"]
names = list(scenarios.keys())
values = [scenarios[n]["total_annual"] for n in names]
y_pos = np.arange(len(names))
bars = ax3.barh(y_pos, values, color=colors["physical"], alpha=0.7, edgecolor='black')
# Data-driven K
data_K = results_dict["近15年"]["K"]
ax3.axvline(data_K, color=colors["model"], lw=3, ls='-',
label=f'Data-driven K = {data_K:.0f}')
# Mark values
for bar, val in zip(bars, values):
ax3.text(val + 100, bar.get_y() + bar.get_height()/2,
f'{val:.0f}', va='center', fontsize=10, fontweight='bold')
ax3.set_yticks(y_pos)
ax3.set_yticklabels(names, fontsize=9)
ax3.set_xlabel("Annual Launch Capacity (10 sites)", fontsize=11)
ax3.set_title("Step 3: Physics-Based Capacity Analysis\n(Independent of statistical model)", fontsize=12)
ax3.legend(loc="lower right", fontsize=10)
ax3.grid(True, alpha=0.3, axis='x')
ax3.set_xlim(0, max(values) * 1.3)
# ========== Plot 4: Convergence Summary ==========
ax4 = axes[1, 1]
ax4.axis('off')
# Summary text
boot_text = f"{boot['mean']:.0f} ± {boot['std']:.0f}" if boot else "N/A"
ci_text = f"[{boot['ci_5']:.0f}, {boot['ci_95']:.0f}]" if boot else "N/A"
summary = f"""
┌──────────────────────────────────────────────────────────────────────┐
│ DUAL VALIDATION SUMMARY │
├──────────────────────────────────────────────────────────────────────┤
│ │
│ STEP 1: DATA-DRIVEN ANALYSIS (Richards Model) │
│ ───────────────────────────────────────────── │
│ • Model: N(t) = K / (1 + exp(-r(t-t₀)))^(1/v) │
│ • Fitting method: Nonlinear least squares (unconstrained) │
│ • Result: K = {results_dict['近15年']['K']:.0f} launches/year │
│ • Goodness of fit: R² = {results_dict['近15年']['r_squared']:.4f}
│ • Bootstrap 90% CI: {ci_text:<25}
│ │
│ STEP 2: PHYSICS-BASED ANALYSIS │
│ ────────────────────────────── │
│ • Number of major launch sites: {physical_limits['n_sites']}
│ • Theoretical maximum (1/site/day): {physical_limits['theoretical_max']} launches/year │
│ • Key constraints: pad turnaround, range safety, weather │
│ │
│ STEP 3: CONVERGENCE CHECK │
│ ───────────────────────── │
│ • Data-driven K / Physical max = {convergence['ratio']:.2f}
│ • Interpretation: {convergence['interpretation']:<30}
│ │
│ CONCLUSION │
│ ────────── │
│ The unconstrained data-driven model yields K ≈ {results_dict['近15年']['K']:.0f}, │
│ which {"converges with" if convergence['converged'] else "differs from"} the physics-based limit of {physical_limits['theoretical_max']}. │
│ │
│ This {"SUPPORTS" if convergence['converged'] else "DOES NOT SUPPORT"} the assumption of 1 launch/site/day as the │
│ upper bound for rocket launch frequency. │
│ │
└──────────────────────────────────────────────────────────────────────┘
"""
ax4.text(0.02, 0.98, summary, transform=ax4.transAxes,
fontsize=10, verticalalignment='top', family='monospace',
bbox=dict(boxstyle='round', facecolor='lightyellow', alpha=0.9))
plt.suptitle("Launch Capacity Analysis: Data-Driven + Physics-Based Dual Validation",
fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig(save_path, dpi=150, bbox_inches='tight')
plt.close()
print(f"Plot saved: {save_path}")
# ============================================================
# 7. Main Analysis
# ============================================================
def main():
print("=" * 70)
print("Launch Capacity Analysis - Dual Validation Methodology")
print("=" * 70)
print("""
Methodology:
1. DATA-DRIVEN: Fit Richards model WITHOUT constraining K
2. PHYSICS-BASED: Derive limits from site capacity analysis
3. CONVERGENCE: Compare both approaches for validation
""")
# Load data
df = load_data()
print(f"Data range: {df['year'].min()} - {df['year'].max()}")
print(f"Data points: {len(df)}")
print(f"\nRecent launch counts:")
for _, row in df.tail(5).iterrows():
print(f" {int(row['year'])}: {int(row['launches'])} launches")
# Define data windows
windows = {
"全部数据": df[df["year"] <= 2025].copy(),
"近25年": df[(df["year"] >= 2000) & (df["year"] <= 2025)].copy(),
"近15年": df[(df["year"] >= 2010) & (df["year"] <= 2025)].copy(),
"近10年": df[(df["year"] >= 2015) & (df["year"] <= 2025)].copy(),
}
results_dict = {}
# ========== STEP 1: Data-Driven Analysis ==========
print("\n" + "=" * 70)
print("STEP 1: DATA-DRIVEN ANALYSIS (Unconstrained Richards Model)")
print("=" * 70)
for window_name, data in windows.items():
print(f"\n--- {window_name} ({data['year'].min()}-{data['year'].max()}, n={len(data)}) ---")
result = fit_richards_unconstrained(data)
results_dict[window_name] = result
if result["success"]:
print(f" K (carrying capacity) = {result['K']:.0f} ± {result['K_err']:.0f}")
print(f" r (growth rate) = {result['r']:.4f}")
print(f" t0 (inflection) = {result['inflection_year']:.1f}")
print(f" v (shape) = {result['v']:.3f}")
print(f" R² = {result['r_squared']:.4f}")
print(f" AIC = {result['aic']:.1f}")
# Bootstrap for recent 15-year data
print("\n--- Bootstrap Uncertainty (近15年, n=1000) ---")
boot_result = bootstrap_K_estimate(windows["近15年"], n_bootstrap=1000)
results_dict["bootstrap"] = boot_result
if boot_result:
print(f" K mean = {boot_result['mean']:.0f}")
print(f" K median = {boot_result['median']:.0f}")
print(f" K std = {boot_result['std']:.0f}")
print(f" 90% CI = [{boot_result['ci_5']:.0f}, {boot_result['ci_95']:.0f}]")
# ========== STEP 2: Physics-Based Analysis ==========
print("\n" + "=" * 70)
print("STEP 2: PHYSICS-BASED ANALYSIS")
print("=" * 70)
physical_limits = derive_physical_limits()
print(f"\nNumber of major launch sites: {physical_limits['n_sites']}")
print(f"\nPer-site capacity scenarios:")
for name, params in physical_limits["scenarios"].items():
print(f" {name}:")
print(f" {params['daily_per_site']:.2f} launches/site/day")
print(f"{params['total_annual']:.0f} launches/year (total)")
print(f"\nTheoretical maximum (1/site/day): {physical_limits['theoretical_max']} launches/year")
# ========== STEP 3: Convergence Analysis ==========
print("\n" + "=" * 70)
print("STEP 3: CONVERGENCE ANALYSIS")
print("=" * 70)
data_K = results_dict["近15年"]["K"]
convergence = analyze_convergence(data_K, boot_result, physical_limits)
print(f"\n Data-driven K: {convergence['data_driven_K']:.0f}")
print(f" Physical maximum: {convergence['physical_max']}")
print(f" Ratio: {convergence['ratio']:.2f}")
print(f"\n Interpretation: {convergence['interpretation']}")
print(f" Recommendation: {convergence['recommendation']}")
# ========== Generate Visualization ==========
print("\n" + "=" * 70)
print("Generating visualization...")
print("=" * 70)
plot_comprehensive_analysis(results_dict, physical_limits, convergence)
# ========== Final Conclusion ==========
print("\n" + "=" * 70)
print("CONCLUSION")
print("=" * 70)
print(f"""
1. DATA-DRIVEN RESULT:
- Unconstrained Richards model yields K = {data_K:.0f} launches/year
- Bootstrap 90% CI: [{boot_result['ci_5']:.0f}, {boot_result['ci_95']:.0f}]
2. PHYSICS-BASED RESULT:
- 10 sites × 365 days = {physical_limits['theoretical_max']} launches/year (theoretical max)
- Based on: pad turnaround, range safety, weather constraints
3. CONVERGENCE:
- Data/Physics ratio = {convergence['ratio']:.2f}
- {convergence['interpretation']}
4. RECOMMENDATION:
- {convergence['recommendation']}
- For model: Adopt 1 launch/site/day = 365 launches/site/year as upper bound
""")
print("=" * 70)
print("Analysis Complete!")
print("=" * 70)
return results_dict, physical_limits, convergence
if __name__ == "__main__":
results, physical, convergence = main()