plane: fig
This commit is contained in:
@@ -1,230 +1,230 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Find Optimal Data Window for K ≈ 3650
|
||||
|
||||
Enumerate all possible starting years to find which data window
|
||||
produces K closest to the physical limit (3650 launches/year).
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from scipy.optimize import curve_fit
|
||||
import matplotlib
|
||||
matplotlib.use('Agg')
|
||||
import matplotlib.pyplot as plt
|
||||
import warnings
|
||||
warnings.filterwarnings('ignore')
|
||||
|
||||
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'DejaVu Sans']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
|
||||
|
||||
# Richards model
|
||||
def richards(t, K, r, t0, v):
|
||||
exp_term = np.exp(-r * (t - t0))
|
||||
exp_term = np.clip(exp_term, 1e-10, 1e10)
|
||||
return K / np.power(1 + exp_term, 1/v)
|
||||
|
||||
|
||||
def load_data(filepath="rocket_launch_counts.csv"):
|
||||
df = pd.read_csv(filepath)
|
||||
df = df.rename(columns={"YDate": "year", "Total": "launches"})
|
||||
df["year"] = pd.to_numeric(df["year"], errors="coerce")
|
||||
df["launches"] = pd.to_numeric(df["launches"], errors="coerce")
|
||||
df = df.dropna(subset=["year", "launches"])
|
||||
df = df[(df["year"] >= 1957) & (df["year"] <= 2025)]
|
||||
df = df.astype({"year": int, "launches": int})
|
||||
df = df.sort_values("year").reset_index(drop=True)
|
||||
return df
|
||||
|
||||
|
||||
def fit_richards(data, base_year=1957):
|
||||
"""Fit unconstrained Richards model"""
|
||||
years = data["year"].values
|
||||
launches = data["launches"].values
|
||||
t = (years - base_year).astype(float)
|
||||
|
||||
p0 = [5000.0, 0.08, 80.0, 2.0]
|
||||
bounds = ([500, 0.005, 10, 0.2], [100000, 1.0, 200, 10.0])
|
||||
|
||||
try:
|
||||
popt, pcov = curve_fit(richards, t, launches, p0=p0, bounds=bounds, maxfev=100000)
|
||||
y_pred = richards(t, *popt)
|
||||
ss_res = np.sum((launches - y_pred) ** 2)
|
||||
ss_tot = np.sum((launches - np.mean(launches)) ** 2)
|
||||
r_squared = 1 - (ss_res / ss_tot) if ss_tot > 0 else 0
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"K": popt[0],
|
||||
"r": popt[1],
|
||||
"t0": popt[2],
|
||||
"v": popt[3],
|
||||
"r_squared": r_squared,
|
||||
"n_points": len(data),
|
||||
}
|
||||
except:
|
||||
return {"success": False}
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 80)
|
||||
print("枚举起始年份,寻找 K ≈ 3650 的数据窗口")
|
||||
print("=" * 80)
|
||||
|
||||
df = load_data()
|
||||
print(f"数据范围: {df['year'].min()} - {df['year'].max()}\n")
|
||||
|
||||
target_K = 3650
|
||||
end_year = 2025
|
||||
|
||||
# Enumerate all starting years
|
||||
results = []
|
||||
|
||||
for start_year in range(1957, 2020):
|
||||
data = df[(df["year"] >= start_year) & (df["year"] <= end_year)].copy()
|
||||
|
||||
if len(data) < 5: # Need at least 5 data points
|
||||
continue
|
||||
|
||||
result = fit_richards(data)
|
||||
|
||||
if result["success"]:
|
||||
diff = abs(result["K"] - target_K)
|
||||
results.append({
|
||||
"start_year": start_year,
|
||||
"end_year": end_year,
|
||||
"years_span": end_year - start_year + 1,
|
||||
"n_points": result["n_points"],
|
||||
"K": result["K"],
|
||||
"r_squared": result["r_squared"],
|
||||
"diff_from_target": diff,
|
||||
"ratio": result["K"] / target_K,
|
||||
})
|
||||
|
||||
# Sort by difference from target
|
||||
results_df = pd.DataFrame(results)
|
||||
results_df = results_df.sort_values("diff_from_target")
|
||||
|
||||
# Print full table
|
||||
print("完整枚举结果表(按与目标值 3650 的差距排序):")
|
||||
print("-" * 80)
|
||||
print(f"{'起始年份':>8} | {'时间跨度':>8} | {'数据点':>6} | {'K值':>10} | {'R²':>8} | {'K/3650':>8} | {'差距':>10}")
|
||||
print("-" * 80)
|
||||
|
||||
for _, row in results_df.iterrows():
|
||||
marker = "★" if row["diff_from_target"] < 500 else ""
|
||||
print(f"{int(row['start_year']):>8} | {int(row['years_span']):>6}年 | {int(row['n_points']):>6} | "
|
||||
f"{row['K']:>10.0f} | {row['r_squared']:>8.4f} | {row['ratio']:>8.2f}x | {row['diff_from_target']:>10.0f} {marker}")
|
||||
|
||||
# Find closest matches
|
||||
print("\n" + "=" * 80)
|
||||
print("最接近 K = 3650 的数据窗口 (Top 10)")
|
||||
print("=" * 80)
|
||||
|
||||
top10 = results_df.head(10)
|
||||
print(f"\n{'排名':>4} | {'起始年份':>8} | {'K值':>10} | {'R²':>8} | {'差距':>10} | {'评价':<20}")
|
||||
print("-" * 75)
|
||||
|
||||
for i, (_, row) in enumerate(top10.iterrows(), 1):
|
||||
if row["r_squared"] < 0.5:
|
||||
comment = "❌ 拟合差,不可信"
|
||||
elif row["r_squared"] < 0.8:
|
||||
comment = "⚠️ 拟合一般"
|
||||
else:
|
||||
comment = "✅ 拟合良好"
|
||||
|
||||
print(f"{i:>4} | {int(row['start_year']):>8} | {row['K']:>10.0f} | {row['r_squared']:>8.4f} | "
|
||||
f"{row['diff_from_target']:>10.0f} | {comment:<20}")
|
||||
|
||||
# Analysis
|
||||
print("\n" + "=" * 80)
|
||||
print("分析结论")
|
||||
print("=" * 80)
|
||||
|
||||
# Find best with good R²
|
||||
good_fit = results_df[results_df["r_squared"] >= 0.8]
|
||||
if len(good_fit) > 0:
|
||||
best_good = good_fit.iloc[0]
|
||||
print(f"\n在 R² ≥ 0.8 的条件下,最接近 K=3650 的窗口:")
|
||||
print(f" 起始年份: {int(best_good['start_year'])}")
|
||||
print(f" K = {best_good['K']:.0f}")
|
||||
print(f" R² = {best_good['r_squared']:.4f}")
|
||||
print(f" 差距: {best_good['diff_from_target']:.0f}")
|
||||
|
||||
# Summary
|
||||
print("\n关键发现:")
|
||||
print("-" * 40)
|
||||
|
||||
# Check if any good fit gives K near 3650
|
||||
near_target = results_df[(results_df["diff_from_target"] < 1000) & (results_df["r_squared"] >= 0.7)]
|
||||
|
||||
if len(near_target) == 0:
|
||||
print(" ⚠️ 没有任何数据窗口能在良好拟合(R²≥0.7)的条件下得到 K≈3650")
|
||||
print(" ⚠️ 所有良好拟合的窗口都给出 K >> 3650 或 K << 3650")
|
||||
print("\n 这说明:")
|
||||
print(" • K=3650 不是数据自然支持的结论")
|
||||
print(" • K=3650 来自物理约束,而非统计预测")
|
||||
print(" • 论文中应该明确说明这是'物理上限'而非'数据预测'")
|
||||
else:
|
||||
print(f" 找到 {len(near_target)} 个窗口使 K 接近 3650:")
|
||||
for _, row in near_target.iterrows():
|
||||
print(f" {int(row['start_year'])}-2025: K={row['K']:.0f}, R²={row['r_squared']:.4f}")
|
||||
|
||||
# Generate visualization
|
||||
print("\n" + "=" * 80)
|
||||
print("生成可视化...")
|
||||
print("=" * 80)
|
||||
|
||||
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
|
||||
|
||||
# Plot 1: K vs Start Year
|
||||
ax1 = axes[0]
|
||||
colors = ['#27AE60' if r2 >= 0.8 else '#F39C12' if r2 >= 0.5 else '#E74C3C'
|
||||
for r2 in results_df["r_squared"]]
|
||||
|
||||
ax1.scatter(results_df["start_year"], results_df["K"], c=colors, s=60, alpha=0.7, edgecolor='black')
|
||||
ax1.axhline(3650, color='blue', ls='--', lw=2, label='Target K=3650')
|
||||
ax1.axhline(3650*0.9, color='blue', ls=':', lw=1, alpha=0.5)
|
||||
ax1.axhline(3650*1.1, color='blue', ls=':', lw=1, alpha=0.5)
|
||||
|
||||
ax1.set_xlabel("Starting Year", fontsize=11)
|
||||
ax1.set_ylabel("K (Carrying Capacity)", fontsize=11)
|
||||
ax1.set_title("K vs Starting Year\n(Color: Green=R²≥0.8, Yellow=R²≥0.5, Red=R²<0.5)", fontsize=12)
|
||||
ax1.legend()
|
||||
ax1.grid(True, alpha=0.3)
|
||||
ax1.set_xlim(1955, 2020)
|
||||
|
||||
# Plot 2: R² vs Start Year
|
||||
ax2 = axes[1]
|
||||
ax2.scatter(results_df["start_year"], results_df["r_squared"], c=colors, s=60, alpha=0.7, edgecolor='black')
|
||||
ax2.axhline(0.8, color='green', ls='--', lw=1.5, label='R²=0.8 (Good)')
|
||||
ax2.axhline(0.5, color='orange', ls=':', lw=1.5, label='R²=0.5 (Acceptable)')
|
||||
|
||||
ax2.set_xlabel("Starting Year", fontsize=11)
|
||||
ax2.set_ylabel("R² (Goodness of Fit)", fontsize=11)
|
||||
ax2.set_title("Model Fit Quality vs Starting Year", fontsize=12)
|
||||
ax2.legend()
|
||||
ax2.grid(True, alpha=0.3)
|
||||
ax2.set_xlim(1955, 2020)
|
||||
ax2.set_ylim(-0.1, 1.1)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig("find_optimal_window.png", dpi=150, bbox_inches='tight')
|
||||
plt.close()
|
||||
print("图表已保存: find_optimal_window.png")
|
||||
|
||||
# Save to CSV
|
||||
results_df.to_csv("window_enumeration.csv", index=False)
|
||||
print("数据已保存: window_enumeration.csv")
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("分析完成!")
|
||||
print("=" * 80)
|
||||
|
||||
return results_df
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
results = main()
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Find Optimal Data Window for K ≈ 3650
|
||||
|
||||
Enumerate all possible starting years to find which data window
|
||||
produces K closest to the physical limit (3650 launches/year).
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from scipy.optimize import curve_fit
|
||||
import matplotlib
|
||||
matplotlib.use('Agg')
|
||||
import matplotlib.pyplot as plt
|
||||
import warnings
|
||||
warnings.filterwarnings('ignore')
|
||||
|
||||
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'DejaVu Sans']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
|
||||
|
||||
# Richards model
|
||||
def richards(t, K, r, t0, v):
|
||||
exp_term = np.exp(-r * (t - t0))
|
||||
exp_term = np.clip(exp_term, 1e-10, 1e10)
|
||||
return K / np.power(1 + exp_term, 1/v)
|
||||
|
||||
|
||||
def load_data(filepath="rocket_launch_counts.csv"):
|
||||
df = pd.read_csv(filepath)
|
||||
df = df.rename(columns={"YDate": "year", "Total": "launches"})
|
||||
df["year"] = pd.to_numeric(df["year"], errors="coerce")
|
||||
df["launches"] = pd.to_numeric(df["launches"], errors="coerce")
|
||||
df = df.dropna(subset=["year", "launches"])
|
||||
df = df[(df["year"] >= 1957) & (df["year"] <= 2025)]
|
||||
df = df.astype({"year": int, "launches": int})
|
||||
df = df.sort_values("year").reset_index(drop=True)
|
||||
return df
|
||||
|
||||
|
||||
def fit_richards(data, base_year=1957):
|
||||
"""Fit unconstrained Richards model"""
|
||||
years = data["year"].values
|
||||
launches = data["launches"].values
|
||||
t = (years - base_year).astype(float)
|
||||
|
||||
p0 = [5000.0, 0.08, 80.0, 2.0]
|
||||
bounds = ([500, 0.005, 10, 0.2], [100000, 1.0, 200, 10.0])
|
||||
|
||||
try:
|
||||
popt, pcov = curve_fit(richards, t, launches, p0=p0, bounds=bounds, maxfev=100000)
|
||||
y_pred = richards(t, *popt)
|
||||
ss_res = np.sum((launches - y_pred) ** 2)
|
||||
ss_tot = np.sum((launches - np.mean(launches)) ** 2)
|
||||
r_squared = 1 - (ss_res / ss_tot) if ss_tot > 0 else 0
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"K": popt[0],
|
||||
"r": popt[1],
|
||||
"t0": popt[2],
|
||||
"v": popt[3],
|
||||
"r_squared": r_squared,
|
||||
"n_points": len(data),
|
||||
}
|
||||
except:
|
||||
return {"success": False}
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 80)
|
||||
print("枚举起始年份,寻找 K ≈ 3650 的数据窗口")
|
||||
print("=" * 80)
|
||||
|
||||
df = load_data()
|
||||
print(f"数据范围: {df['year'].min()} - {df['year'].max()}\n")
|
||||
|
||||
target_K = 3650
|
||||
end_year = 2025
|
||||
|
||||
# Enumerate all starting years
|
||||
results = []
|
||||
|
||||
for start_year in range(1957, 2020):
|
||||
data = df[(df["year"] >= start_year) & (df["year"] <= end_year)].copy()
|
||||
|
||||
if len(data) < 5: # Need at least 5 data points
|
||||
continue
|
||||
|
||||
result = fit_richards(data)
|
||||
|
||||
if result["success"]:
|
||||
diff = abs(result["K"] - target_K)
|
||||
results.append({
|
||||
"start_year": start_year,
|
||||
"end_year": end_year,
|
||||
"years_span": end_year - start_year + 1,
|
||||
"n_points": result["n_points"],
|
||||
"K": result["K"],
|
||||
"r_squared": result["r_squared"],
|
||||
"diff_from_target": diff,
|
||||
"ratio": result["K"] / target_K,
|
||||
})
|
||||
|
||||
# Sort by difference from target
|
||||
results_df = pd.DataFrame(results)
|
||||
results_df = results_df.sort_values("diff_from_target")
|
||||
|
||||
# Print full table
|
||||
print("完整枚举结果表(按与目标值 3650 的差距排序):")
|
||||
print("-" * 80)
|
||||
print(f"{'起始年份':>8} | {'时间跨度':>8} | {'数据点':>6} | {'K值':>10} | {'R²':>8} | {'K/3650':>8} | {'差距':>10}")
|
||||
print("-" * 80)
|
||||
|
||||
for _, row in results_df.iterrows():
|
||||
marker = "★" if row["diff_from_target"] < 500 else ""
|
||||
print(f"{int(row['start_year']):>8} | {int(row['years_span']):>6}年 | {int(row['n_points']):>6} | "
|
||||
f"{row['K']:>10.0f} | {row['r_squared']:>8.4f} | {row['ratio']:>8.2f}x | {row['diff_from_target']:>10.0f} {marker}")
|
||||
|
||||
# Find closest matches
|
||||
print("\n" + "=" * 80)
|
||||
print("最接近 K = 3650 的数据窗口 (Top 10)")
|
||||
print("=" * 80)
|
||||
|
||||
top10 = results_df.head(10)
|
||||
print(f"\n{'排名':>4} | {'起始年份':>8} | {'K值':>10} | {'R²':>8} | {'差距':>10} | {'评价':<20}")
|
||||
print("-" * 75)
|
||||
|
||||
for i, (_, row) in enumerate(top10.iterrows(), 1):
|
||||
if row["r_squared"] < 0.5:
|
||||
comment = "❌ 拟合差,不可信"
|
||||
elif row["r_squared"] < 0.8:
|
||||
comment = "⚠️ 拟合一般"
|
||||
else:
|
||||
comment = "✅ 拟合良好"
|
||||
|
||||
print(f"{i:>4} | {int(row['start_year']):>8} | {row['K']:>10.0f} | {row['r_squared']:>8.4f} | "
|
||||
f"{row['diff_from_target']:>10.0f} | {comment:<20}")
|
||||
|
||||
# Analysis
|
||||
print("\n" + "=" * 80)
|
||||
print("分析结论")
|
||||
print("=" * 80)
|
||||
|
||||
# Find best with good R²
|
||||
good_fit = results_df[results_df["r_squared"] >= 0.8]
|
||||
if len(good_fit) > 0:
|
||||
best_good = good_fit.iloc[0]
|
||||
print(f"\n在 R² ≥ 0.8 的条件下,最接近 K=3650 的窗口:")
|
||||
print(f" 起始年份: {int(best_good['start_year'])}")
|
||||
print(f" K = {best_good['K']:.0f}")
|
||||
print(f" R² = {best_good['r_squared']:.4f}")
|
||||
print(f" 差距: {best_good['diff_from_target']:.0f}")
|
||||
|
||||
# Summary
|
||||
print("\n关键发现:")
|
||||
print("-" * 40)
|
||||
|
||||
# Check if any good fit gives K near 3650
|
||||
near_target = results_df[(results_df["diff_from_target"] < 1000) & (results_df["r_squared"] >= 0.7)]
|
||||
|
||||
if len(near_target) == 0:
|
||||
print(" ⚠️ 没有任何数据窗口能在良好拟合(R²≥0.7)的条件下得到 K≈3650")
|
||||
print(" ⚠️ 所有良好拟合的窗口都给出 K >> 3650 或 K << 3650")
|
||||
print("\n 这说明:")
|
||||
print(" • K=3650 不是数据自然支持的结论")
|
||||
print(" • K=3650 来自物理约束,而非统计预测")
|
||||
print(" • 论文中应该明确说明这是'物理上限'而非'数据预测'")
|
||||
else:
|
||||
print(f" 找到 {len(near_target)} 个窗口使 K 接近 3650:")
|
||||
for _, row in near_target.iterrows():
|
||||
print(f" {int(row['start_year'])}-2025: K={row['K']:.0f}, R²={row['r_squared']:.4f}")
|
||||
|
||||
# Generate visualization
|
||||
print("\n" + "=" * 80)
|
||||
print("生成可视化...")
|
||||
print("=" * 80)
|
||||
|
||||
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
|
||||
|
||||
# Plot 1: K vs Start Year
|
||||
ax1 = axes[0]
|
||||
colors = ['#27AE60' if r2 >= 0.8 else '#F39C12' if r2 >= 0.5 else '#E74C3C'
|
||||
for r2 in results_df["r_squared"]]
|
||||
|
||||
ax1.scatter(results_df["start_year"], results_df["K"], c=colors, s=60, alpha=0.7, edgecolor='black')
|
||||
ax1.axhline(3650, color='blue', ls='--', lw=2, label='Target K=3650')
|
||||
ax1.axhline(3650*0.9, color='blue', ls=':', lw=1, alpha=0.5)
|
||||
ax1.axhline(3650*1.1, color='blue', ls=':', lw=1, alpha=0.5)
|
||||
|
||||
ax1.set_xlabel("Starting Year", fontsize=11)
|
||||
ax1.set_ylabel("K (Carrying Capacity)", fontsize=11)
|
||||
ax1.set_title("K vs Starting Year\n(Color: Green=R²≥0.8, Yellow=R²≥0.5, Red=R²<0.5)", fontsize=12)
|
||||
ax1.legend()
|
||||
ax1.grid(True, alpha=0.3)
|
||||
ax1.set_xlim(1955, 2020)
|
||||
|
||||
# Plot 2: R² vs Start Year
|
||||
ax2 = axes[1]
|
||||
ax2.scatter(results_df["start_year"], results_df["r_squared"], c=colors, s=60, alpha=0.7, edgecolor='black')
|
||||
ax2.axhline(0.8, color='green', ls='--', lw=1.5, label='R²=0.8 (Good)')
|
||||
ax2.axhline(0.5, color='orange', ls=':', lw=1.5, label='R²=0.5 (Acceptable)')
|
||||
|
||||
ax2.set_xlabel("Starting Year", fontsize=11)
|
||||
ax2.set_ylabel("R² (Goodness of Fit)", fontsize=11)
|
||||
ax2.set_title("Model Fit Quality vs Starting Year", fontsize=12)
|
||||
ax2.legend()
|
||||
ax2.grid(True, alpha=0.3)
|
||||
ax2.set_xlim(1955, 2020)
|
||||
ax2.set_ylim(-0.1, 1.1)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig("find_optimal_window.png", dpi=150, bbox_inches='tight')
|
||||
plt.close()
|
||||
print("图表已保存: find_optimal_window.png")
|
||||
|
||||
# Save to CSV
|
||||
results_df.to_csv("window_enumeration.csv", index=False)
|
||||
print("数据已保存: window_enumeration.csv")
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("分析完成!")
|
||||
print("=" * 80)
|
||||
|
||||
return results_df
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
results = main()
|
||||
|
||||
Reference in New Issue
Block a user