#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Find Optimal Data Window for K ≈ 3650 Enumerate all possible starting years to find which data window produces K closest to the physical limit (3650 launches/year). """ import pandas as pd import numpy as np from scipy.optimize import curve_fit import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import warnings warnings.filterwarnings('ignore') plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'DejaVu Sans'] plt.rcParams['axes.unicode_minus'] = False # Richards model def richards(t, K, r, t0, v): exp_term = np.exp(-r * (t - t0)) exp_term = np.clip(exp_term, 1e-10, 1e10) return K / np.power(1 + exp_term, 1/v) def load_data(filepath="rocket_launch_counts.csv"): df = pd.read_csv(filepath) df = df.rename(columns={"YDate": "year", "Total": "launches"}) df["year"] = pd.to_numeric(df["year"], errors="coerce") df["launches"] = pd.to_numeric(df["launches"], errors="coerce") df = df.dropna(subset=["year", "launches"]) df = df[(df["year"] >= 1957) & (df["year"] <= 2025)] df = df.astype({"year": int, "launches": int}) df = df.sort_values("year").reset_index(drop=True) return df def fit_richards(data, base_year=1957): """Fit unconstrained Richards model""" years = data["year"].values launches = data["launches"].values t = (years - base_year).astype(float) p0 = [5000.0, 0.08, 80.0, 2.0] bounds = ([500, 0.005, 10, 0.2], [100000, 1.0, 200, 10.0]) try: popt, pcov = curve_fit(richards, t, launches, p0=p0, bounds=bounds, maxfev=100000) y_pred = richards(t, *popt) ss_res = np.sum((launches - y_pred) ** 2) ss_tot = np.sum((launches - np.mean(launches)) ** 2) r_squared = 1 - (ss_res / ss_tot) if ss_tot > 0 else 0 return { "success": True, "K": popt[0], "r": popt[1], "t0": popt[2], "v": popt[3], "r_squared": r_squared, "n_points": len(data), } except: return {"success": False} def main(): print("=" * 80) print("枚举起始年份,寻找 K ≈ 3650 的数据窗口") print("=" * 80) df = load_data() print(f"数据范围: {df['year'].min()} - {df['year'].max()}\n") target_K = 3650 end_year = 2025 # Enumerate all starting years results = [] for start_year in range(1957, 2020): data = df[(df["year"] >= start_year) & (df["year"] <= end_year)].copy() if len(data) < 5: # Need at least 5 data points continue result = fit_richards(data) if result["success"]: diff = abs(result["K"] - target_K) results.append({ "start_year": start_year, "end_year": end_year, "years_span": end_year - start_year + 1, "n_points": result["n_points"], "K": result["K"], "r_squared": result["r_squared"], "diff_from_target": diff, "ratio": result["K"] / target_K, }) # Sort by difference from target results_df = pd.DataFrame(results) results_df = results_df.sort_values("diff_from_target") # Print full table print("完整枚举结果表(按与目标值 3650 的差距排序):") print("-" * 80) print(f"{'起始年份':>8} | {'时间跨度':>8} | {'数据点':>6} | {'K值':>10} | {'R²':>8} | {'K/3650':>8} | {'差距':>10}") print("-" * 80) for _, row in results_df.iterrows(): marker = "★" if row["diff_from_target"] < 500 else "" print(f"{int(row['start_year']):>8} | {int(row['years_span']):>6}年 | {int(row['n_points']):>6} | " f"{row['K']:>10.0f} | {row['r_squared']:>8.4f} | {row['ratio']:>8.2f}x | {row['diff_from_target']:>10.0f} {marker}") # Find closest matches print("\n" + "=" * 80) print("最接近 K = 3650 的数据窗口 (Top 10)") print("=" * 80) top10 = results_df.head(10) print(f"\n{'排名':>4} | {'起始年份':>8} | {'K值':>10} | {'R²':>8} | {'差距':>10} | {'评价':<20}") print("-" * 75) for i, (_, row) in enumerate(top10.iterrows(), 1): if row["r_squared"] < 0.5: comment = "❌ 拟合差,不可信" elif row["r_squared"] < 0.8: comment = "⚠️ 拟合一般" else: comment = "✅ 拟合良好" print(f"{i:>4} | {int(row['start_year']):>8} | {row['K']:>10.0f} | {row['r_squared']:>8.4f} | " f"{row['diff_from_target']:>10.0f} | {comment:<20}") # Analysis print("\n" + "=" * 80) print("分析结论") print("=" * 80) # Find best with good R² good_fit = results_df[results_df["r_squared"] >= 0.8] if len(good_fit) > 0: best_good = good_fit.iloc[0] print(f"\n在 R² ≥ 0.8 的条件下,最接近 K=3650 的窗口:") print(f" 起始年份: {int(best_good['start_year'])}") print(f" K = {best_good['K']:.0f}") print(f" R² = {best_good['r_squared']:.4f}") print(f" 差距: {best_good['diff_from_target']:.0f}") # Summary print("\n关键发现:") print("-" * 40) # Check if any good fit gives K near 3650 near_target = results_df[(results_df["diff_from_target"] < 1000) & (results_df["r_squared"] >= 0.7)] if len(near_target) == 0: print(" ⚠️ 没有任何数据窗口能在良好拟合(R²≥0.7)的条件下得到 K≈3650") print(" ⚠️ 所有良好拟合的窗口都给出 K >> 3650 或 K << 3650") print("\n 这说明:") print(" • K=3650 不是数据自然支持的结论") print(" • K=3650 来自物理约束,而非统计预测") print(" • 论文中应该明确说明这是'物理上限'而非'数据预测'") else: print(f" 找到 {len(near_target)} 个窗口使 K 接近 3650:") for _, row in near_target.iterrows(): print(f" {int(row['start_year'])}-2025: K={row['K']:.0f}, R²={row['r_squared']:.4f}") # Generate visualization print("\n" + "=" * 80) print("生成可视化...") print("=" * 80) fig, axes = plt.subplots(1, 2, figsize=(14, 6)) # Plot 1: K vs Start Year ax1 = axes[0] colors = ['#27AE60' if r2 >= 0.8 else '#F39C12' if r2 >= 0.5 else '#E74C3C' for r2 in results_df["r_squared"]] ax1.scatter(results_df["start_year"], results_df["K"], c=colors, s=60, alpha=0.7, edgecolor='black') ax1.axhline(3650, color='blue', ls='--', lw=2, label='Target K=3650') ax1.axhline(3650*0.9, color='blue', ls=':', lw=1, alpha=0.5) ax1.axhline(3650*1.1, color='blue', ls=':', lw=1, alpha=0.5) ax1.set_xlabel("Starting Year", fontsize=11) ax1.set_ylabel("K (Carrying Capacity)", fontsize=11) ax1.set_title("K vs Starting Year\n(Color: Green=R²≥0.8, Yellow=R²≥0.5, Red=R²<0.5)", fontsize=12) ax1.legend() ax1.grid(True, alpha=0.3) ax1.set_xlim(1955, 2020) # Plot 2: R² vs Start Year ax2 = axes[1] ax2.scatter(results_df["start_year"], results_df["r_squared"], c=colors, s=60, alpha=0.7, edgecolor='black') ax2.axhline(0.8, color='green', ls='--', lw=1.5, label='R²=0.8 (Good)') ax2.axhline(0.5, color='orange', ls=':', lw=1.5, label='R²=0.5 (Acceptable)') ax2.set_xlabel("Starting Year", fontsize=11) ax2.set_ylabel("R² (Goodness of Fit)", fontsize=11) ax2.set_title("Model Fit Quality vs Starting Year", fontsize=12) ax2.legend() ax2.grid(True, alpha=0.3) ax2.set_xlim(1955, 2020) ax2.set_ylim(-0.1, 1.1) plt.tight_layout() plt.savefig("find_optimal_window.png", dpi=150, bbox_inches='tight') plt.close() print("图表已保存: find_optimal_window.png") # Save to CSV results_df.to_csv("window_enumeration.csv", index=False) print("数据已保存: window_enumeration.csv") print("\n" + "=" * 80) print("分析完成!") print("=" * 80) return results_df if __name__ == "__main__": results = main()