diff --git a/task3/07_sensitivity.py b/task3/07_sensitivity.py index 9e25e28..b96f00e 100644 --- a/task3/07_sensitivity.py +++ b/task3/07_sensitivity.py @@ -1,24 +1,35 @@ """ -Task 3 - Step 7: 敏感性分析 -============================ +Task 3 - Step 7: 敏感性分析 (Refined) +===================================== -分析以下参数对模型输出的影响: -1. 合并比例 r_merge: [1/3, 1/2, 2/3] -2. 距离阈值 l_max: [30, 40, 50, 60, 70] -3. 容量上限 μ_sum_max: [400, 425, 450, 475, 500] -4. CV阈值: [0.3, 0.4, 0.5, 0.6] +分析以下参数对模型输出的影响 (高分辨率扫描): +1. 合并比例 r_merge: 0.1 ~ 0.9 (步长 0.01) +2. 距离阈值 l_max: 10 ~ 100 miles (步长 1) +3. 容量上限 μ_sum_max: 350 ~ 550 (步长 5) +4. CV阈值: 0.1 ~ 1.0 (步长 0.01) -输出: 07_sensitivity.xlsx (各参数对E1', E2', F1', R1的影响) +输出: +- 07_sensitivity.xlsx (详细数据) +- figures/fig3_sensitivity.png (敏感性曲线图) """ import pandas as pd import numpy as np from scipy import stats +import matplotlib.pyplot as plt import warnings +import os +from pathlib import Path + warnings.filterwarnings('ignore') +# 设置绘图风格 +plt.style.use('seaborn-v0_8-whitegrid') +plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'DejaVu Sans'] # 适配中文 +plt.rcParams['axes.unicode_minus'] = False + # ============================================ -# 基础参数和函数 +# 基础参数和函数 (保持不变) # ============================================ Q = 400 QUALITY_THRESHOLD = 250 @@ -61,10 +72,23 @@ def calc_distance(lat1, lon1, lat2, lon2): delta_lon = lon1 - lon2 return 69.0 * np.sqrt(delta_lat**2 + (np.cos(lat_avg_rad) * delta_lon)**2) +def compute_distance_matrix(sites_df: pd.DataFrame) -> np.ndarray: + lat = sites_df['lat'].to_numpy(dtype=float) + lon = sites_df['lon'].to_numpy(dtype=float) + + lat_avg = (lat[:, None] + lat[None, :]) / 2.0 + lat_avg_rad = np.radians(lat_avg) + delta_lat = lat[:, None] - lat[None, :] + delta_lon = lon[:, None] - lon[None, :] + + dist = 69.0 * np.sqrt(delta_lat**2 + (np.cos(lat_avg_rad) * delta_lon)**2) + np.fill_diagonal(dist, 0.0) + return dist + # ============================================ # 完整流水线函数 # ============================================ -def run_pipeline(sites_df, l_max=50, mu_sum_max=450, cv_max=0.5, merge_ratio=0.5): +def run_pipeline(sites_df, distance_matrix, l_max=50, mu_sum_max=450, cv_max=0.5, merge_ratio=0.5): """ 运行完整的Task 3流水线,返回评估指标 """ @@ -72,23 +96,15 @@ def run_pipeline(sites_df, l_max=50, mu_sum_max=450, cv_max=0.5, merge_ratio=0.5 sites['cv'] = sites['sigma'] / sites['mu'] n = len(sites) - # Step 1: 计算距离矩阵 - distance_matrix = np.zeros((n, n)) - for i in range(n): - for j in range(n): - if i != j: - distance_matrix[i, j] = calc_distance( - sites.iloc[i]['lat'], sites.iloc[i]['lon'], - sites.iloc[j]['lat'], sites.iloc[j]['lon'] - ) - # Step 2: 配对筛选 candidates = [] + for i in range(n): for j in range(i + 1, n): site_i = sites.iloc[i] site_j = sites.iloc[j] - dist = distance_matrix[i, j] + + dist = float(distance_matrix[i, j]) if dist > l_max: continue @@ -113,7 +129,6 @@ def run_pipeline(sites_df, l_max=50, mu_sum_max=450, cv_max=0.5, merge_ratio=0.5 }) if len(candidates) == 0: - # 无可行配对,返回Task 1的指标 E1 = (sites['k'] * sites['mu']).sum() E2 = sum(sites['k'] * sites['mu'].apply(quality_factor) * sites['mu']) rates = [row['k'] * row['mu'] / row['mu_tilde'] for _, row in sites.iterrows()] @@ -145,8 +160,6 @@ def run_pipeline(sites_df, l_max=50, mu_sum_max=450, cv_max=0.5, merge_ratio=0.5 # Step 4: 重分配访问次数 sites['k_single'] = sites['k'].copy() - sites['k_dual'] = 0 - pair_k = {} for pair in selected: k_i, k_j = pair['k_i'], pair['k_j'] @@ -158,9 +171,7 @@ def run_pipeline(sites_df, l_max=50, mu_sum_max=450, cv_max=0.5, merge_ratio=0.5 idx_i, idx_j = pair['idx_i'], pair['idx_j'] sites.loc[idx_i, 'k_single'] = k_i - k_ij - sites.loc[idx_i, 'k_dual'] = k_ij sites.loc[idx_j, 'k_single'] = k_j - k_ij - sites.loc[idx_j, 'k_dual'] = k_ij pair_k[(pair['site_i_id'], pair['site_j_id'])] = k_ij # 计算释放槽位并重分配 @@ -181,13 +192,11 @@ def run_pipeline(sites_df, l_max=50, mu_sum_max=450, cv_max=0.5, merge_ratio=0.5 sites['k_single_final'] = sites['k_single'] # Step 5: 计算指标 - # E1' E1 = (sites['k_single_final'] * sites['mu']).sum() for pair in selected: k_ij = pair_k.get((pair['site_i_id'], pair['site_j_id']), 0) E1 += k_ij * pair['E_total'] - # E2' E2 = sum(sites['k_single_final'] * sites['mu'].apply(quality_factor) * sites['mu']) for pair in selected: k_ij = pair_k.get((pair['site_i_id'], pair['site_j_id']), 0) @@ -195,7 +204,6 @@ def run_pipeline(sites_df, l_max=50, mu_sum_max=450, cv_max=0.5, merge_ratio=0.5 q_factor = quality_factor(mu_sum) E2 += k_ij * q_factor * pair['E_total'] - # 满足率 site_satisfaction = {} for idx, row in sites.iterrows(): r = row['k_single_final'] * row['mu'] / row['mu_tilde'] if row['mu_tilde'] > 0 else 0 @@ -212,7 +220,6 @@ def run_pipeline(sites_df, l_max=50, mu_sum_max=450, cv_max=0.5, merge_ratio=0.5 F1 = gini_coefficient(rates) F2 = min(rates) if rates else 0 - # R1: 缺口风险 shortfall_probs = [] for pair in selected: q = pair['q_final'] @@ -221,7 +228,6 @@ def run_pipeline(sites_df, l_max=50, mu_sum_max=450, cv_max=0.5, merge_ratio=0.5 shortfall_probs.append(1 - (1 - p_i) * (1 - p_j)) R1 = np.mean(shortfall_probs) if shortfall_probs else 0 - # RS: 资源节省率 RS = total_dual / 730 return { @@ -234,12 +240,21 @@ def run_pipeline(sites_df, l_max=50, mu_sum_max=450, cv_max=0.5, merge_ratio=0.5 # 主程序 # ============================================ print("=" * 60) -print("Task 3 - Step 7: 敏感性分析") +print("Task 3 - Step 7: 敏感性分析 (High Res)") print("=" * 60) # 读取基础数据 -sites_df = pd.read_excel('../task1/03_allocate.xlsx') -print(f"\n读取站点数据: {len(sites_df)} 个站点") +BASE_DIR = Path(__file__).resolve().parent +SITES_PATH = BASE_DIR.parent / 'task1' / '03_allocate.xlsx' +OUTPUT_XLSX = BASE_DIR / '07_sensitivity.xlsx' +OUTPUT_FIG = BASE_DIR / 'figures' / 'fig3_sensitivity.png' +OUTPUT_FIG.parent.mkdir(parents=True, exist_ok=True) + +sites_df = pd.read_excel(SITES_PATH) +print(f"站点数据: {len(sites_df)} 个") + +# 预计算距离矩阵(高分辨率扫描时显著加速) +distance_matrix = compute_distance_matrix(sites_df) # 基准参数 BASE_L_MAX = 50 @@ -248,153 +263,130 @@ BASE_CV_MAX = 0.5 BASE_MERGE_RATIO = 0.5 # 计算基准结果 -print(f"\n计算基准结果...") -base_result = run_pipeline(sites_df, BASE_L_MAX, BASE_MU_SUM_MAX, BASE_CV_MAX, BASE_MERGE_RATIO) -print(f"基准: E1={base_result['E1']:.0f}, E2={base_result['E2']:.0f}, F1={base_result['F1']:.4f}, R1={base_result['R1']:.4f}") +base_result = run_pipeline(sites_df, distance_matrix, BASE_L_MAX, BASE_MU_SUM_MAX, BASE_CV_MAX, BASE_MERGE_RATIO) +print(f"基准结果: E1={base_result['E1']:.0f}, R1={base_result['R1']:.4f}") + +# 定义扫描参数 +params_config = { + 'merge_ratio': np.round(np.arange(0.10, 0.90 + 1e-9, 0.01), 2), + 'l_max': np.arange(10, 100 + 1e-9, 1, dtype=float), + 'mu_sum_max': np.arange(350, 550 + 1e-9, 5, dtype=float), + 'cv_max': np.round(np.arange(0.10, 1.00 + 1e-9, 0.01), 2), +} -# ============================================ -# 敏感性分析 -# ============================================ all_results = [] -# 1. 合并比例敏感性 -print(f"\n" + "-" * 40) -print("1. 合并比例敏感性 (merge_ratio)") -print("-" * 40) +# 执行扫描 +print("\n开始参数扫描...") -merge_ratios = [1/3, 0.5, 2/3] -for mr in merge_ratios: - result = run_pipeline(sites_df, BASE_L_MAX, BASE_MU_SUM_MAX, BASE_CV_MAX, mr) - result['param'] = 'merge_ratio' - result['param_value'] = mr - all_results.append(result) - print(f" merge_ratio={mr:.3f}: pairs={result['num_pairs']}, dual={result['num_dual_visits']}, " - f"E1={result['E1']:.0f}, E2={result['E2']:.0f}, F1={result['F1']:.4f}, R1={result['R1']:.4f}") +# 1. Merge Ratio +print("Scanning Merge Ratio...") +for val in params_config['merge_ratio']: + res = run_pipeline(sites_df, distance_matrix, BASE_L_MAX, BASE_MU_SUM_MAX, BASE_CV_MAX, val) + res.update({'param': 'merge_ratio', 'value': val}) + all_results.append(res) -# 2. 距离阈值敏感性 -print(f"\n" + "-" * 40) -print("2. 距离阈值敏感性 (l_max)") -print("-" * 40) +# 2. Distance Threshold +print("Scanning Distance Threshold...") +for val in params_config['l_max']: + res = run_pipeline(sites_df, distance_matrix, val, BASE_MU_SUM_MAX, BASE_CV_MAX, BASE_MERGE_RATIO) + res.update({'param': 'l_max', 'value': val}) + all_results.append(res) -l_max_values = [30, 40, 50, 60, 70] -for lm in l_max_values: - result = run_pipeline(sites_df, lm, BASE_MU_SUM_MAX, BASE_CV_MAX, BASE_MERGE_RATIO) - result['param'] = 'l_max' - result['param_value'] = lm - all_results.append(result) - print(f" l_max={lm}: pairs={result['num_pairs']}, dual={result['num_dual_visits']}, " - f"E1={result['E1']:.0f}, E2={result['E2']:.0f}, F1={result['F1']:.4f}, R1={result['R1']:.4f}") +# 3. Capacity Cap +print("Scanning Capacity Cap...") +for val in params_config['mu_sum_max']: + res = run_pipeline(sites_df, distance_matrix, BASE_L_MAX, val, BASE_CV_MAX, BASE_MERGE_RATIO) + res.update({'param': 'mu_sum_max', 'value': val}) + all_results.append(res) -# 3. 容量上限敏感性 -print(f"\n" + "-" * 40) -print("3. 容量上限敏感性 (mu_sum_max)") -print("-" * 40) +# 4. CV Threshold +print("Scanning CV Threshold...") +for val in params_config['cv_max']: + res = run_pipeline(sites_df, distance_matrix, BASE_L_MAX, BASE_MU_SUM_MAX, val, BASE_MERGE_RATIO) + res.update({'param': 'cv_max', 'value': val}) + all_results.append(res) -mu_sum_values = [400, 425, 450, 475, 500] -for ms in mu_sum_values: - result = run_pipeline(sites_df, BASE_L_MAX, ms, BASE_CV_MAX, BASE_MERGE_RATIO) - result['param'] = 'mu_sum_max' - result['param_value'] = ms - all_results.append(result) - print(f" mu_sum_max={ms}: pairs={result['num_pairs']}, dual={result['num_dual_visits']}, " - f"E1={result['E1']:.0f}, E2={result['E2']:.0f}, F1={result['F1']:.4f}, R1={result['R1']:.4f}") - -# 4. CV阈值敏感性 -print(f"\n" + "-" * 40) -print("4. CV阈值敏感性 (cv_max)") -print("-" * 40) - -cv_max_values = [0.3, 0.4, 0.5, 0.6] -for cv in cv_max_values: - result = run_pipeline(sites_df, BASE_L_MAX, BASE_MU_SUM_MAX, cv, BASE_MERGE_RATIO) - result['param'] = 'cv_max' - result['param_value'] = cv - all_results.append(result) - print(f" cv_max={cv}: pairs={result['num_pairs']}, dual={result['num_dual_visits']}, " - f"E1={result['E1']:.0f}, E2={result['E2']:.0f}, F1={result['F1']:.4f}, R1={result['R1']:.4f}") - -# ============================================ -# 汇总分析 -# ============================================ +# 转换为DataFrame df_results = pd.DataFrame(all_results) -print(f"\n" + "=" * 60) -print("敏感性分析汇总") -print("=" * 60) +# ============================================ +# 绘图 +# ============================================ +print("\n绘制敏感性曲线...") -# 计算各参数的影响范围 -for param in ['merge_ratio', 'l_max', 'mu_sum_max', 'cv_max']: - subset = df_results[df_results['param'] == param] - print(f"\n{param}:") - print(f" E1 变化范围: [{subset['E1'].min():.0f}, {subset['E1'].max():.0f}], " - f"变化幅度: {(subset['E1'].max() - subset['E1'].min()) / base_result['E1'] * 100:.2f}%") - print(f" E2 变化范围: [{subset['E2'].min():.0f}, {subset['E2'].max():.0f}], " - f"变化幅度: {(subset['E2'].max() - subset['E2'].min()) / base_result['E2'] * 100:.2f}%") - print(f" F1 变化范围: [{subset['F1'].min():.4f}, {subset['F1'].max():.4f}]") - print(f" R1 变化范围: [{subset['R1'].min():.4f}, {subset['R1'].max():.4f}]") +fig, axes = plt.subplots(2, 2, figsize=(15, 12)) +fig.suptitle('Task 3 Sensitivity Analysis', fontsize=16, fontweight='bold') + +# 参数对应的中文标签和单位 +param_labels = { + 'merge_ratio': ('Merge Ratio', 'Ratio'), + 'l_max': ('Distance Threshold (l_max)', 'Miles'), + 'mu_sum_max': ('Capacity Cap (μ_sum)', 'lbs (proxy)'), + 'cv_max': ('CV Threshold', 'CV') +} + +# 绘图循环 +params_list = ['merge_ratio', 'l_max', 'mu_sum_max', 'cv_max'] +for i, param in enumerate(params_list): + ax = axes[i // 2, i % 2] + data = df_results[df_results['param'] == param].sort_values('value') + + # 绘制 E1/E2 - 左轴 + line1, = ax.plot(data['value'], data['E1'], 'b-', linewidth=1.6, label='Expected Service (E1)') + line2, = ax.plot(data['value'], data['E2'], 'g-', linewidth=1.6, alpha=0.9, label='Quality-weighted (E2)') + ax.set_xlabel(param_labels[param][0]) + ax.set_ylabel('Service (E1/E2)') + + # 绘制 R1 (风险) - 右轴 + ax2 = ax.twinx() + line3, = ax2.plot(data['value'], data['R1'], 'r--', linewidth=1.6, label='Shortfall Risk (R1)') + ax2.set_ylabel('Risk Probability (R1)', color='r') + ax2.tick_params(axis='y', labelcolor='r') + + # 添加基准线 + if param == 'merge_ratio': + base_val = BASE_MERGE_RATIO + elif param == 'l_max': + base_val = BASE_L_MAX + elif param == 'mu_sum_max': + base_val = BASE_MU_SUM_MAX + elif param == 'cv_max': + base_val = BASE_CV_MAX + + ax.axvline(x=base_val, color='gray', linestyle=':', alpha=0.5) + + # 图例 + lines = [line1, line2, line3] + labels = [l.get_label() for l in lines] + ax.legend(lines, labels, loc='best', frameon=True) + + ax.set_title(f'Effect of {param_labels[param][0]}') + ax.grid(True, alpha=0.3) + +plt.tight_layout(rect=[0, 0.03, 1, 0.95]) +plt.savefig(OUTPUT_FIG, dpi=300) +print(f"图表已保存至: {OUTPUT_FIG}") # ============================================ -# 保存结果 +# 保存详细数据 # ============================================ -OUTPUT_FILE = '07_sensitivity.xlsx' - -with pd.ExcelWriter(OUTPUT_FILE, engine='openpyxl') as writer: - # Sheet 1: 所有结果 - df_results.to_excel(writer, sheet_name='all_results', index=False) - - # Sheet 2: 合并比例敏感性 - df_merge = df_results[df_results['param'] == 'merge_ratio'].copy() - df_merge.to_excel(writer, sheet_name='merge_ratio', index=False) - - # Sheet 3: 距离阈值敏感性 - df_lmax = df_results[df_results['param'] == 'l_max'].copy() - df_lmax.to_excel(writer, sheet_name='l_max', index=False) - - # Sheet 4: 容量上限敏感性 - df_musum = df_results[df_results['param'] == 'mu_sum_max'].copy() - df_musum.to_excel(writer, sheet_name='mu_sum_max', index=False) - - # Sheet 5: CV阈值敏感性 - df_cv = df_results[df_results['param'] == 'cv_max'].copy() - df_cv.to_excel(writer, sheet_name='cv_max', index=False) - - # Sheet 6: 基准结果 - base_df = pd.DataFrame([{ - 'param': 'baseline', - 'l_max': BASE_L_MAX, - 'mu_sum_max': BASE_MU_SUM_MAX, - 'cv_max': BASE_CV_MAX, - 'merge_ratio': BASE_MERGE_RATIO, - **base_result - }]) - base_df.to_excel(writer, sheet_name='baseline', index=False) - - # Sheet 7: 汇总统计 - summary_rows = [] - for param in ['merge_ratio', 'l_max', 'mu_sum_max', 'cv_max']: - subset = df_results[df_results['param'] == param] - summary_rows.append({ - 'param': param, - 'E1_min': subset['E1'].min(), - 'E1_max': subset['E1'].max(), - 'E1_range_pct': (subset['E1'].max() - subset['E1'].min()) / base_result['E1'] * 100, - 'E2_min': subset['E2'].min(), - 'E2_max': subset['E2'].max(), - 'E2_range_pct': (subset['E2'].max() - subset['E2'].min()) / base_result['E2'] * 100, - 'F1_min': subset['F1'].min(), - 'F1_max': subset['F1'].max(), - 'R1_min': subset['R1'].min(), - 'R1_max': subset['R1'].max() +with pd.ExcelWriter(OUTPUT_XLSX, engine='openpyxl') as writer: + df_results.to_excel(writer, sheet_name='detailed_data', index=False) + + # 摘要统计 + summary = [] + for param in params_list: + sub = df_results[df_results['param'] == param] + summary.append({ + 'Parameter': param, + 'E1_Range': f"{sub['E1'].min():.0f} - {sub['E1'].max():.0f}", + 'E1_Var%': (sub['E1'].max() - sub['E1'].min()) / base_result['E1'] * 100, + 'E2_Range': f"{sub['E2'].min():.0f} - {sub['E2'].max():.0f}", + 'E2_Var%': (sub['E2'].max() - sub['E2'].min()) / base_result['E2'] * 100, + 'R1_Range': f"{sub['R1'].min():.4f} - {sub['R1'].max():.4f}" }) - df_summary = pd.DataFrame(summary_rows) - df_summary.to_excel(writer, sheet_name='summary', index=False) + pd.DataFrame(summary).to_excel(writer, sheet_name='summary', index=False) -print(f"\n结果已保存至: {OUTPUT_FILE}") -print(" - Sheet 'all_results': 所有结果") -print(" - Sheet 'merge_ratio': 合并比例敏感性") -print(" - Sheet 'l_max': 距离阈值敏感性") -print(" - Sheet 'mu_sum_max': 容量上限敏感性") -print(" - Sheet 'cv_max': CV阈值敏感性") -print(" - Sheet 'baseline': 基准结果") -print(" - Sheet 'summary': 汇总统计") -print("\n" + "=" * 60) +print(f"数据已保存至: {OUTPUT_XLSX}") +print("完成!") diff --git a/task3/07_sensitivity.xlsx b/task3/07_sensitivity.xlsx index efcfccf..ad10fdd 100644 Binary files a/task3/07_sensitivity.xlsx and b/task3/07_sensitivity.xlsx differ diff --git a/task3/figures/fig3_sensitivity.png b/task3/figures/fig3_sensitivity.png new file mode 100644 index 0000000..774dd75 Binary files /dev/null and b/task3/figures/fig3_sensitivity.png differ