plane: fig

2026-02-02 21:47:52 +08:00
parent 8192fcc354
commit 244e157bc0
11 changed files with 3245 additions and 3108 deletions
--- a/p1/find_optimal_window.py
+++ b/p1/find_optimal_window.py
@@ -1,230 +1,230 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Find Optimal Data Window for K ≈ 3650
-
-Enumerate all possible starting years to find which data window
-produces K closest to the physical limit (3650 launches/year).
-"""
-
-import pandas as pd
-import numpy as np
-from scipy.optimize import curve_fit
-import matplotlib
-matplotlib.use('Agg')
-import matplotlib.pyplot as plt
-import warnings
-warnings.filterwarnings('ignore')
-
-plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'DejaVu Sans']
-plt.rcParams['axes.unicode_minus'] = False
-
-
-# Richards model
-def richards(t, K, r, t0, v):
-    exp_term = np.exp(-r * (t - t0))
-    exp_term = np.clip(exp_term, 1e-10, 1e10)
-    return K / np.power(1 + exp_term, 1/v)
-
-
-def load_data(filepath="rocket_launch_counts.csv"):
-    df = pd.read_csv(filepath)
-    df = df.rename(columns={"YDate": "year", "Total": "launches"})
-    df["year"] = pd.to_numeric(df["year"], errors="coerce")
-    df["launches"] = pd.to_numeric(df["launches"], errors="coerce")
-    df = df.dropna(subset=["year", "launches"])
-    df = df[(df["year"] >= 1957) & (df["year"] <= 2025)]
-    df = df.astype({"year": int, "launches": int})
-    df = df.sort_values("year").reset_index(drop=True)
-    return df
-
-
-def fit_richards(data, base_year=1957):
-    """Fit unconstrained Richards model"""
-    years = data["year"].values
-    launches = data["launches"].values
-    t = (years - base_year).astype(float)
-    
-    p0 = [5000.0, 0.08, 80.0, 2.0]
-    bounds = ([500, 0.005, 10, 0.2], [100000, 1.0, 200, 10.0])
-    
-    try:
-        popt, pcov = curve_fit(richards, t, launches, p0=p0, bounds=bounds, maxfev=100000)
-        y_pred = richards(t, *popt)
-        ss_res = np.sum((launches - y_pred) ** 2)
-        ss_tot = np.sum((launches - np.mean(launches)) ** 2)
-        r_squared = 1 - (ss_res / ss_tot) if ss_tot > 0 else 0
-        
-        return {
-            "success": True,
-            "K": popt[0],
-            "r": popt[1],
-            "t0": popt[2],
-            "v": popt[3],
-            "r_squared": r_squared,
-            "n_points": len(data),
-        }
-    except:
-        return {"success": False}
-
-
-def main():
-    print("=" * 80)
-    print("枚举起始年份，寻找 K ≈ 3650 的数据窗口")
-    print("=" * 80)
-    
-    df = load_data()
-    print(f"数据范围: {df['year'].min()} - {df['year'].max()}\n")
-    
-    target_K = 3650
-    end_year = 2025
-    
-    # Enumerate all starting years
-    results = []
-    
-    for start_year in range(1957, 2020):
-        data = df[(df["year"] >= start_year) & (df["year"] <= end_year)].copy()
-        
-        if len(data) < 5:  # Need at least 5 data points
-            continue
-        
-        result = fit_richards(data)
-        
-        if result["success"]:
-            diff = abs(result["K"] - target_K)
-            results.append({
-                "start_year": start_year,
-                "end_year": end_year,
-                "years_span": end_year - start_year + 1,
-                "n_points": result["n_points"],
-                "K": result["K"],
-                "r_squared": result["r_squared"],
-                "diff_from_target": diff,
-                "ratio": result["K"] / target_K,
-            })
-    
-    # Sort by difference from target
-    results_df = pd.DataFrame(results)
-    results_df = results_df.sort_values("diff_from_target")
-    
-    # Print full table
-    print("完整枚举结果表（按与目标值 3650 的差距排序）:")
-    print("-" * 80)
-    print(f"{'起始年份':>8} | {'时间跨度':>8} | {'数据点':>6} | {'K值':>10} | {'R²':>8} | {'K/3650':>8} | {'差距':>10}")
-    print("-" * 80)
-    
-    for _, row in results_df.iterrows():
-        marker = "★" if row["diff_from_target"] < 500 else ""
-        print(f"{int(row['start_year']):>8} | {int(row['years_span']):>6}年 | {int(row['n_points']):>6} | "
-              f"{row['K']:>10.0f} | {row['r_squared']:>8.4f} | {row['ratio']:>8.2f}x | {row['diff_from_target']:>10.0f} {marker}")
-    
-    # Find closest matches
-    print("\n" + "=" * 80)
-    print("最接近 K = 3650 的数据窗口 (Top 10)")
-    print("=" * 80)
-    
-    top10 = results_df.head(10)
-    print(f"\n{'排名':>4} | {'起始年份':>8} | {'K值':>10} | {'R²':>8} | {'差距':>10} | {'评价':<20}")
-    print("-" * 75)
-    
-    for i, (_, row) in enumerate(top10.iterrows(), 1):
-        if row["r_squared"] < 0.5:
-            comment = "❌ 拟合差，不可信"
-        elif row["r_squared"] < 0.8:
-            comment = "⚠️ 拟合一般"
-        else:
-            comment = "✅ 拟合良好"
-        
-        print(f"{i:>4} | {int(row['start_year']):>8} | {row['K']:>10.0f} | {row['r_squared']:>8.4f} | "
-              f"{row['diff_from_target']:>10.0f} | {comment:<20}")
-    
-    # Analysis
-    print("\n" + "=" * 80)
-    print("分析结论")
-    print("=" * 80)
-    
-    # Find best with good R²
-    good_fit = results_df[results_df["r_squared"] >= 0.8]
-    if len(good_fit) > 0:
-        best_good = good_fit.iloc[0]
-        print(f"\n在 R² ≥ 0.8 的条件下，最接近 K=3650 的窗口:")
-        print(f"  起始年份: {int(best_good['start_year'])}")
-        print(f"  K = {best_good['K']:.0f}")
-        print(f"  R² = {best_good['r_squared']:.4f}")
-        print(f"  差距: {best_good['diff_from_target']:.0f}")
-    
-    # Summary
-    print("\n关键发现:")
-    print("-" * 40)
-    
-    # Check if any good fit gives K near 3650
-    near_target = results_df[(results_df["diff_from_target"] < 1000) & (results_df["r_squared"] >= 0.7)]
-    
-    if len(near_target) == 0:
-        print("  ⚠️ 没有任何数据窗口能在良好拟合(R²≥0.7)的条件下得到 K≈3650")
-        print("  ⚠️ 所有良好拟合的窗口都给出 K >> 3650 或 K << 3650")
-        print("\n  这说明:")
-        print("  • K=3650 不是数据自然支持的结论")
-        print("  • K=3650 来自物理约束，而非统计预测")
-        print("  • 论文中应该明确说明这是'物理上限'而非'数据预测'")
-    else:
-        print(f"  找到 {len(near_target)} 个窗口使 K 接近 3650:")
-        for _, row in near_target.iterrows():
-            print(f"    {int(row['start_year'])}-2025: K={row['K']:.0f}, R²={row['r_squared']:.4f}")
-    
-    # Generate visualization
-    print("\n" + "=" * 80)
-    print("生成可视化...")
-    print("=" * 80)
-    
-    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
-    
-    # Plot 1: K vs Start Year
-    ax1 = axes[0]
-    colors = ['#27AE60' if r2 >= 0.8 else '#F39C12' if r2 >= 0.5 else '#E74C3C' 
-              for r2 in results_df["r_squared"]]
-    
-    ax1.scatter(results_df["start_year"], results_df["K"], c=colors, s=60, alpha=0.7, edgecolor='black')
-    ax1.axhline(3650, color='blue', ls='--', lw=2, label='Target K=3650')
-    ax1.axhline(3650*0.9, color='blue', ls=':', lw=1, alpha=0.5)
-    ax1.axhline(3650*1.1, color='blue', ls=':', lw=1, alpha=0.5)
-    
-    ax1.set_xlabel("Starting Year", fontsize=11)
-    ax1.set_ylabel("K (Carrying Capacity)", fontsize=11)
-    ax1.set_title("K vs Starting Year\n(Color: Green=R²≥0.8, Yellow=R²≥0.5, Red=R²<0.5)", fontsize=12)
-    ax1.legend()
-    ax1.grid(True, alpha=0.3)
-    ax1.set_xlim(1955, 2020)
-    
-    # Plot 2: R² vs Start Year
-    ax2 = axes[1]
-    ax2.scatter(results_df["start_year"], results_df["r_squared"], c=colors, s=60, alpha=0.7, edgecolor='black')
-    ax2.axhline(0.8, color='green', ls='--', lw=1.5, label='R²=0.8 (Good)')
-    ax2.axhline(0.5, color='orange', ls=':', lw=1.5, label='R²=0.5 (Acceptable)')
-    
-    ax2.set_xlabel("Starting Year", fontsize=11)
-    ax2.set_ylabel("R² (Goodness of Fit)", fontsize=11)
-    ax2.set_title("Model Fit Quality vs Starting Year", fontsize=12)
-    ax2.legend()
-    ax2.grid(True, alpha=0.3)
-    ax2.set_xlim(1955, 2020)
-    ax2.set_ylim(-0.1, 1.1)
-    
-    plt.tight_layout()
-    plt.savefig("find_optimal_window.png", dpi=150, bbox_inches='tight')
-    plt.close()
-    print("图表已保存: find_optimal_window.png")
-    
-    # Save to CSV
-    results_df.to_csv("window_enumeration.csv", index=False)
-    print("数据已保存: window_enumeration.csv")
-    
-    print("\n" + "=" * 80)
-    print("分析完成!")
-    print("=" * 80)
-    
-    return results_df
-
-
-if __name__ == "__main__":
-    results = main()
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Find Optimal Data Window for K ≈ 3650
+
+Enumerate all possible starting years to find which data window
+produces K closest to the physical limit (3650 launches/year).
+"""
+
+import pandas as pd
+import numpy as np
+from scipy.optimize import curve_fit
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+import warnings
+warnings.filterwarnings('ignore')
+
+plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'DejaVu Sans']
+plt.rcParams['axes.unicode_minus'] = False
+
+
+# Richards model
+def richards(t, K, r, t0, v):
+    exp_term = np.exp(-r * (t - t0))
+    exp_term = np.clip(exp_term, 1e-10, 1e10)
+    return K / np.power(1 + exp_term, 1/v)
+
+
+def load_data(filepath="rocket_launch_counts.csv"):
+    df = pd.read_csv(filepath)
+    df = df.rename(columns={"YDate": "year", "Total": "launches"})
+    df["year"] = pd.to_numeric(df["year"], errors="coerce")
+    df["launches"] = pd.to_numeric(df["launches"], errors="coerce")
+    df = df.dropna(subset=["year", "launches"])
+    df = df[(df["year"] >= 1957) & (df["year"] <= 2025)]
+    df = df.astype({"year": int, "launches": int})
+    df = df.sort_values("year").reset_index(drop=True)
+    return df
+
+
+def fit_richards(data, base_year=1957):
+    """Fit unconstrained Richards model"""
+    years = data["year"].values
+    launches = data["launches"].values
+    t = (years - base_year).astype(float)
+    
+    p0 = [5000.0, 0.08, 80.0, 2.0]
+    bounds = ([500, 0.005, 10, 0.2], [100000, 1.0, 200, 10.0])
+    
+    try:
+        popt, pcov = curve_fit(richards, t, launches, p0=p0, bounds=bounds, maxfev=100000)
+        y_pred = richards(t, *popt)
+        ss_res = np.sum((launches - y_pred) ** 2)
+        ss_tot = np.sum((launches - np.mean(launches)) ** 2)
+        r_squared = 1 - (ss_res / ss_tot) if ss_tot > 0 else 0
+        
+        return {
+            "success": True,
+            "K": popt[0],
+            "r": popt[1],
+            "t0": popt[2],
+            "v": popt[3],
+            "r_squared": r_squared,
+            "n_points": len(data),
+        }
+    except:
+        return {"success": False}
+
+
+def main():
+    print("=" * 80)
+    print("枚举起始年份，寻找 K ≈ 3650 的数据窗口")
+    print("=" * 80)
+    
+    df = load_data()
+    print(f"数据范围: {df['year'].min()} - {df['year'].max()}\n")
+    
+    target_K = 3650
+    end_year = 2025
+    
+    # Enumerate all starting years
+    results = []
+    
+    for start_year in range(1957, 2020):
+        data = df[(df["year"] >= start_year) & (df["year"] <= end_year)].copy()
+        
+        if len(data) < 5:  # Need at least 5 data points
+            continue
+        
+        result = fit_richards(data)
+        
+        if result["success"]:
+            diff = abs(result["K"] - target_K)
+            results.append({
+                "start_year": start_year,
+                "end_year": end_year,
+                "years_span": end_year - start_year + 1,
+                "n_points": result["n_points"],
+                "K": result["K"],
+                "r_squared": result["r_squared"],
+                "diff_from_target": diff,
+                "ratio": result["K"] / target_K,
+            })
+    
+    # Sort by difference from target
+    results_df = pd.DataFrame(results)
+    results_df = results_df.sort_values("diff_from_target")
+    
+    # Print full table
+    print("完整枚举结果表（按与目标值 3650 的差距排序）:")
+    print("-" * 80)
+    print(f"{'起始年份':>8} | {'时间跨度':>8} | {'数据点':>6} | {'K值':>10} | {'R²':>8} | {'K/3650':>8} | {'差距':>10}")
+    print("-" * 80)
+    
+    for _, row in results_df.iterrows():
+        marker = "★" if row["diff_from_target"] < 500 else ""
+        print(f"{int(row['start_year']):>8} | {int(row['years_span']):>6}年 | {int(row['n_points']):>6} | "
+              f"{row['K']:>10.0f} | {row['r_squared']:>8.4f} | {row['ratio']:>8.2f}x | {row['diff_from_target']:>10.0f} {marker}")
+    
+    # Find closest matches
+    print("\n" + "=" * 80)
+    print("最接近 K = 3650 的数据窗口 (Top 10)")
+    print("=" * 80)
+    
+    top10 = results_df.head(10)
+    print(f"\n{'排名':>4} | {'起始年份':>8} | {'K值':>10} | {'R²':>8} | {'差距':>10} | {'评价':<20}")
+    print("-" * 75)
+    
+    for i, (_, row) in enumerate(top10.iterrows(), 1):
+        if row["r_squared"] < 0.5:
+            comment = "❌ 拟合差，不可信"
+        elif row["r_squared"] < 0.8:
+            comment = "⚠️ 拟合一般"
+        else:
+            comment = "✅ 拟合良好"
+        
+        print(f"{i:>4} | {int(row['start_year']):>8} | {row['K']:>10.0f} | {row['r_squared']:>8.4f} | "
+              f"{row['diff_from_target']:>10.0f} | {comment:<20}")
+    
+    # Analysis
+    print("\n" + "=" * 80)
+    print("分析结论")
+    print("=" * 80)
+    
+    # Find best with good R²
+    good_fit = results_df[results_df["r_squared"] >= 0.8]
+    if len(good_fit) > 0:
+        best_good = good_fit.iloc[0]
+        print(f"\n在 R² ≥ 0.8 的条件下，最接近 K=3650 的窗口:")
+        print(f"  起始年份: {int(best_good['start_year'])}")
+        print(f"  K = {best_good['K']:.0f}")
+        print(f"  R² = {best_good['r_squared']:.4f}")
+        print(f"  差距: {best_good['diff_from_target']:.0f}")
+    
+    # Summary
+    print("\n关键发现:")
+    print("-" * 40)
+    
+    # Check if any good fit gives K near 3650
+    near_target = results_df[(results_df["diff_from_target"] < 1000) & (results_df["r_squared"] >= 0.7)]
+    
+    if len(near_target) == 0:
+        print("  ⚠️ 没有任何数据窗口能在良好拟合(R²≥0.7)的条件下得到 K≈3650")
+        print("  ⚠️ 所有良好拟合的窗口都给出 K >> 3650 或 K << 3650")
+        print("\n  这说明:")
+        print("  • K=3650 不是数据自然支持的结论")
+        print("  • K=3650 来自物理约束，而非统计预测")
+        print("  • 论文中应该明确说明这是'物理上限'而非'数据预测'")
+    else:
+        print(f"  找到 {len(near_target)} 个窗口使 K 接近 3650:")
+        for _, row in near_target.iterrows():
+            print(f"    {int(row['start_year'])}-2025: K={row['K']:.0f}, R²={row['r_squared']:.4f}")
+    
+    # Generate visualization
+    print("\n" + "=" * 80)
+    print("生成可视化...")
+    print("=" * 80)
+    
+    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
+    
+    # Plot 1: K vs Start Year
+    ax1 = axes[0]
+    colors = ['#27AE60' if r2 >= 0.8 else '#F39C12' if r2 >= 0.5 else '#E74C3C' 
+              for r2 in results_df["r_squared"]]
+    
+    ax1.scatter(results_df["start_year"], results_df["K"], c=colors, s=60, alpha=0.7, edgecolor='black')
+    ax1.axhline(3650, color='blue', ls='--', lw=2, label='Target K=3650')
+    ax1.axhline(3650*0.9, color='blue', ls=':', lw=1, alpha=0.5)
+    ax1.axhline(3650*1.1, color='blue', ls=':', lw=1, alpha=0.5)
+    
+    ax1.set_xlabel("Starting Year", fontsize=11)
+    ax1.set_ylabel("K (Carrying Capacity)", fontsize=11)
+    ax1.set_title("K vs Starting Year\n(Color: Green=R²≥0.8, Yellow=R²≥0.5, Red=R²<0.5)", fontsize=12)
+    ax1.legend()
+    ax1.grid(True, alpha=0.3)
+    ax1.set_xlim(1955, 2020)
+    
+    # Plot 2: R² vs Start Year
+    ax2 = axes[1]
+    ax2.scatter(results_df["start_year"], results_df["r_squared"], c=colors, s=60, alpha=0.7, edgecolor='black')
+    ax2.axhline(0.8, color='green', ls='--', lw=1.5, label='R²=0.8 (Good)')
+    ax2.axhline(0.5, color='orange', ls=':', lw=1.5, label='R²=0.5 (Acceptable)')
+    
+    ax2.set_xlabel("Starting Year", fontsize=11)
+    ax2.set_ylabel("R² (Goodness of Fit)", fontsize=11)
+    ax2.set_title("Model Fit Quality vs Starting Year", fontsize=12)
+    ax2.legend()
+    ax2.grid(True, alpha=0.3)
+    ax2.set_xlim(1955, 2020)
+    ax2.set_ylim(-0.1, 1.1)
+    
+    plt.tight_layout()
+    plt.savefig("find_optimal_window.png", dpi=150, bbox_inches='tight')
+    plt.close()
+    print("图表已保存: find_optimal_window.png")
+    
+    # Save to CSV
+    results_df.to_csv("window_enumeration.csv", index=False)
+    print("数据已保存: window_enumeration.csv")
+    
+    print("\n" + "=" * 80)
+    print("分析完成!")
+    print("=" * 80)
+    
+    return results_df
+
+
+if __name__ == "__main__":
+    results = main()