P3: sens figure

This commit is contained in:
2026-01-19 14:15:51 +08:00
parent d4d7f7d94b
commit 73ae99885a
3 changed files with 160 additions and 168 deletions

View File

@@ -1,24 +1,35 @@
""" """
Task 3 - Step 7: 敏感性分析 Task 3 - Step 7: 敏感性分析 (Refined)
============================ =====================================
分析以下参数对模型输出的影响 分析以下参数对模型输出的影响 (高分辨率扫描):
1. 合并比例 r_merge: [1/3, 1/2, 2/3] 1. 合并比例 r_merge: 0.1 ~ 0.9 (步长 0.01)
2. 距离阈值 l_max: [30, 40, 50, 60, 70] 2. 距离阈值 l_max: 10 ~ 100 miles (步长 1)
3. 容量上限 μ_sum_max: [400, 425, 450, 475, 500] 3. 容量上限 μ_sum_max: 350 ~ 550 (步长 5)
4. CV阈值: [0.3, 0.4, 0.5, 0.6] 4. CV阈值: 0.1 ~ 1.0 (步长 0.01)
输出: 07_sensitivity.xlsx (各参数对E1', E2', F1', R1的影响) 输出:
- 07_sensitivity.xlsx (详细数据)
- figures/fig3_sensitivity.png (敏感性曲线图)
""" """
import pandas as pd import pandas as pd
import numpy as np import numpy as np
from scipy import stats from scipy import stats
import matplotlib.pyplot as plt
import warnings import warnings
import os
from pathlib import Path
warnings.filterwarnings('ignore') warnings.filterwarnings('ignore')
# 设置绘图风格
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'DejaVu Sans'] # 适配中文
plt.rcParams['axes.unicode_minus'] = False
# ============================================ # ============================================
# 基础参数和函数 # 基础参数和函数 (保持不变)
# ============================================ # ============================================
Q = 400 Q = 400
QUALITY_THRESHOLD = 250 QUALITY_THRESHOLD = 250
@@ -61,10 +72,23 @@ def calc_distance(lat1, lon1, lat2, lon2):
delta_lon = lon1 - lon2 delta_lon = lon1 - lon2
return 69.0 * np.sqrt(delta_lat**2 + (np.cos(lat_avg_rad) * delta_lon)**2) return 69.0 * np.sqrt(delta_lat**2 + (np.cos(lat_avg_rad) * delta_lon)**2)
def compute_distance_matrix(sites_df: pd.DataFrame) -> np.ndarray:
lat = sites_df['lat'].to_numpy(dtype=float)
lon = sites_df['lon'].to_numpy(dtype=float)
lat_avg = (lat[:, None] + lat[None, :]) / 2.0
lat_avg_rad = np.radians(lat_avg)
delta_lat = lat[:, None] - lat[None, :]
delta_lon = lon[:, None] - lon[None, :]
dist = 69.0 * np.sqrt(delta_lat**2 + (np.cos(lat_avg_rad) * delta_lon)**2)
np.fill_diagonal(dist, 0.0)
return dist
# ============================================ # ============================================
# 完整流水线函数 # 完整流水线函数
# ============================================ # ============================================
def run_pipeline(sites_df, l_max=50, mu_sum_max=450, cv_max=0.5, merge_ratio=0.5): def run_pipeline(sites_df, distance_matrix, l_max=50, mu_sum_max=450, cv_max=0.5, merge_ratio=0.5):
""" """
运行完整的Task 3流水线返回评估指标 运行完整的Task 3流水线返回评估指标
""" """
@@ -72,23 +96,15 @@ def run_pipeline(sites_df, l_max=50, mu_sum_max=450, cv_max=0.5, merge_ratio=0.5
sites['cv'] = sites['sigma'] / sites['mu'] sites['cv'] = sites['sigma'] / sites['mu']
n = len(sites) n = len(sites)
# Step 1: 计算距离矩阵
distance_matrix = np.zeros((n, n))
for i in range(n):
for j in range(n):
if i != j:
distance_matrix[i, j] = calc_distance(
sites.iloc[i]['lat'], sites.iloc[i]['lon'],
sites.iloc[j]['lat'], sites.iloc[j]['lon']
)
# Step 2: 配对筛选 # Step 2: 配对筛选
candidates = [] candidates = []
for i in range(n): for i in range(n):
for j in range(i + 1, n): for j in range(i + 1, n):
site_i = sites.iloc[i] site_i = sites.iloc[i]
site_j = sites.iloc[j] site_j = sites.iloc[j]
dist = distance_matrix[i, j]
dist = float(distance_matrix[i, j])
if dist > l_max: if dist > l_max:
continue continue
@@ -113,7 +129,6 @@ def run_pipeline(sites_df, l_max=50, mu_sum_max=450, cv_max=0.5, merge_ratio=0.5
}) })
if len(candidates) == 0: if len(candidates) == 0:
# 无可行配对返回Task 1的指标
E1 = (sites['k'] * sites['mu']).sum() E1 = (sites['k'] * sites['mu']).sum()
E2 = sum(sites['k'] * sites['mu'].apply(quality_factor) * sites['mu']) E2 = sum(sites['k'] * sites['mu'].apply(quality_factor) * sites['mu'])
rates = [row['k'] * row['mu'] / row['mu_tilde'] for _, row in sites.iterrows()] rates = [row['k'] * row['mu'] / row['mu_tilde'] for _, row in sites.iterrows()]
@@ -145,8 +160,6 @@ def run_pipeline(sites_df, l_max=50, mu_sum_max=450, cv_max=0.5, merge_ratio=0.5
# Step 4: 重分配访问次数 # Step 4: 重分配访问次数
sites['k_single'] = sites['k'].copy() sites['k_single'] = sites['k'].copy()
sites['k_dual'] = 0
pair_k = {} pair_k = {}
for pair in selected: for pair in selected:
k_i, k_j = pair['k_i'], pair['k_j'] k_i, k_j = pair['k_i'], pair['k_j']
@@ -158,9 +171,7 @@ def run_pipeline(sites_df, l_max=50, mu_sum_max=450, cv_max=0.5, merge_ratio=0.5
idx_i, idx_j = pair['idx_i'], pair['idx_j'] idx_i, idx_j = pair['idx_i'], pair['idx_j']
sites.loc[idx_i, 'k_single'] = k_i - k_ij sites.loc[idx_i, 'k_single'] = k_i - k_ij
sites.loc[idx_i, 'k_dual'] = k_ij
sites.loc[idx_j, 'k_single'] = k_j - k_ij sites.loc[idx_j, 'k_single'] = k_j - k_ij
sites.loc[idx_j, 'k_dual'] = k_ij
pair_k[(pair['site_i_id'], pair['site_j_id'])] = k_ij pair_k[(pair['site_i_id'], pair['site_j_id'])] = k_ij
# 计算释放槽位并重分配 # 计算释放槽位并重分配
@@ -181,13 +192,11 @@ def run_pipeline(sites_df, l_max=50, mu_sum_max=450, cv_max=0.5, merge_ratio=0.5
sites['k_single_final'] = sites['k_single'] sites['k_single_final'] = sites['k_single']
# Step 5: 计算指标 # Step 5: 计算指标
# E1'
E1 = (sites['k_single_final'] * sites['mu']).sum() E1 = (sites['k_single_final'] * sites['mu']).sum()
for pair in selected: for pair in selected:
k_ij = pair_k.get((pair['site_i_id'], pair['site_j_id']), 0) k_ij = pair_k.get((pair['site_i_id'], pair['site_j_id']), 0)
E1 += k_ij * pair['E_total'] E1 += k_ij * pair['E_total']
# E2'
E2 = sum(sites['k_single_final'] * sites['mu'].apply(quality_factor) * sites['mu']) E2 = sum(sites['k_single_final'] * sites['mu'].apply(quality_factor) * sites['mu'])
for pair in selected: for pair in selected:
k_ij = pair_k.get((pair['site_i_id'], pair['site_j_id']), 0) k_ij = pair_k.get((pair['site_i_id'], pair['site_j_id']), 0)
@@ -195,7 +204,6 @@ def run_pipeline(sites_df, l_max=50, mu_sum_max=450, cv_max=0.5, merge_ratio=0.5
q_factor = quality_factor(mu_sum) q_factor = quality_factor(mu_sum)
E2 += k_ij * q_factor * pair['E_total'] E2 += k_ij * q_factor * pair['E_total']
# 满足率
site_satisfaction = {} site_satisfaction = {}
for idx, row in sites.iterrows(): for idx, row in sites.iterrows():
r = row['k_single_final'] * row['mu'] / row['mu_tilde'] if row['mu_tilde'] > 0 else 0 r = row['k_single_final'] * row['mu'] / row['mu_tilde'] if row['mu_tilde'] > 0 else 0
@@ -212,7 +220,6 @@ def run_pipeline(sites_df, l_max=50, mu_sum_max=450, cv_max=0.5, merge_ratio=0.5
F1 = gini_coefficient(rates) F1 = gini_coefficient(rates)
F2 = min(rates) if rates else 0 F2 = min(rates) if rates else 0
# R1: 缺口风险
shortfall_probs = [] shortfall_probs = []
for pair in selected: for pair in selected:
q = pair['q_final'] q = pair['q_final']
@@ -221,7 +228,6 @@ def run_pipeline(sites_df, l_max=50, mu_sum_max=450, cv_max=0.5, merge_ratio=0.5
shortfall_probs.append(1 - (1 - p_i) * (1 - p_j)) shortfall_probs.append(1 - (1 - p_i) * (1 - p_j))
R1 = np.mean(shortfall_probs) if shortfall_probs else 0 R1 = np.mean(shortfall_probs) if shortfall_probs else 0
# RS: 资源节省率
RS = total_dual / 730 RS = total_dual / 730
return { return {
@@ -234,12 +240,21 @@ def run_pipeline(sites_df, l_max=50, mu_sum_max=450, cv_max=0.5, merge_ratio=0.5
# 主程序 # 主程序
# ============================================ # ============================================
print("=" * 60) print("=" * 60)
print("Task 3 - Step 7: 敏感性分析") print("Task 3 - Step 7: 敏感性分析 (High Res)")
print("=" * 60) print("=" * 60)
# 读取基础数据 # 读取基础数据
sites_df = pd.read_excel('../task1/03_allocate.xlsx') BASE_DIR = Path(__file__).resolve().parent
print(f"\n读取站点数据: {len(sites_df)} 个站点") SITES_PATH = BASE_DIR.parent / 'task1' / '03_allocate.xlsx'
OUTPUT_XLSX = BASE_DIR / '07_sensitivity.xlsx'
OUTPUT_FIG = BASE_DIR / 'figures' / 'fig3_sensitivity.png'
OUTPUT_FIG.parent.mkdir(parents=True, exist_ok=True)
sites_df = pd.read_excel(SITES_PATH)
print(f"站点数据: {len(sites_df)}")
# 预计算距离矩阵(高分辨率扫描时显著加速)
distance_matrix = compute_distance_matrix(sites_df)
# 基准参数 # 基准参数
BASE_L_MAX = 50 BASE_L_MAX = 50
@@ -248,153 +263,130 @@ BASE_CV_MAX = 0.5
BASE_MERGE_RATIO = 0.5 BASE_MERGE_RATIO = 0.5
# 计算基准结果 # 计算基准结果
print(f"\n计算基准结果...") base_result = run_pipeline(sites_df, distance_matrix, BASE_L_MAX, BASE_MU_SUM_MAX, BASE_CV_MAX, BASE_MERGE_RATIO)
base_result = run_pipeline(sites_df, BASE_L_MAX, BASE_MU_SUM_MAX, BASE_CV_MAX, BASE_MERGE_RATIO) print(f"基准结果: E1={base_result['E1']:.0f}, R1={base_result['R1']:.4f}")
print(f"基准: E1={base_result['E1']:.0f}, E2={base_result['E2']:.0f}, F1={base_result['F1']:.4f}, R1={base_result['R1']:.4f}")
# 定义扫描参数
params_config = {
'merge_ratio': np.round(np.arange(0.10, 0.90 + 1e-9, 0.01), 2),
'l_max': np.arange(10, 100 + 1e-9, 1, dtype=float),
'mu_sum_max': np.arange(350, 550 + 1e-9, 5, dtype=float),
'cv_max': np.round(np.arange(0.10, 1.00 + 1e-9, 0.01), 2),
}
# ============================================
# 敏感性分析
# ============================================
all_results = [] all_results = []
# 1. 合并比例敏感性 # 执行扫描
print(f"\n" + "-" * 40) print("\n开始参数扫描...")
print("1. 合并比例敏感性 (merge_ratio)")
print("-" * 40)
merge_ratios = [1/3, 0.5, 2/3] # 1. Merge Ratio
for mr in merge_ratios: print("Scanning Merge Ratio...")
result = run_pipeline(sites_df, BASE_L_MAX, BASE_MU_SUM_MAX, BASE_CV_MAX, mr) for val in params_config['merge_ratio']:
result['param'] = 'merge_ratio' res = run_pipeline(sites_df, distance_matrix, BASE_L_MAX, BASE_MU_SUM_MAX, BASE_CV_MAX, val)
result['param_value'] = mr res.update({'param': 'merge_ratio', 'value': val})
all_results.append(result) all_results.append(res)
print(f" merge_ratio={mr:.3f}: pairs={result['num_pairs']}, dual={result['num_dual_visits']}, "
f"E1={result['E1']:.0f}, E2={result['E2']:.0f}, F1={result['F1']:.4f}, R1={result['R1']:.4f}")
# 2. 距离阈值敏感性 # 2. Distance Threshold
print(f"\n" + "-" * 40) print("Scanning Distance Threshold...")
print("2. 距离阈值敏感性 (l_max)") for val in params_config['l_max']:
print("-" * 40) res = run_pipeline(sites_df, distance_matrix, val, BASE_MU_SUM_MAX, BASE_CV_MAX, BASE_MERGE_RATIO)
res.update({'param': 'l_max', 'value': val})
all_results.append(res)
l_max_values = [30, 40, 50, 60, 70] # 3. Capacity Cap
for lm in l_max_values: print("Scanning Capacity Cap...")
result = run_pipeline(sites_df, lm, BASE_MU_SUM_MAX, BASE_CV_MAX, BASE_MERGE_RATIO) for val in params_config['mu_sum_max']:
result['param'] = 'l_max' res = run_pipeline(sites_df, distance_matrix, BASE_L_MAX, val, BASE_CV_MAX, BASE_MERGE_RATIO)
result['param_value'] = lm res.update({'param': 'mu_sum_max', 'value': val})
all_results.append(result) all_results.append(res)
print(f" l_max={lm}: pairs={result['num_pairs']}, dual={result['num_dual_visits']}, "
f"E1={result['E1']:.0f}, E2={result['E2']:.0f}, F1={result['F1']:.4f}, R1={result['R1']:.4f}")
# 3. 容量上限敏感性 # 4. CV Threshold
print(f"\n" + "-" * 40) print("Scanning CV Threshold...")
print("3. 容量上限敏感性 (mu_sum_max)") for val in params_config['cv_max']:
print("-" * 40) res = run_pipeline(sites_df, distance_matrix, BASE_L_MAX, BASE_MU_SUM_MAX, val, BASE_MERGE_RATIO)
res.update({'param': 'cv_max', 'value': val})
all_results.append(res)
mu_sum_values = [400, 425, 450, 475, 500] # 转换为DataFrame
for ms in mu_sum_values:
result = run_pipeline(sites_df, BASE_L_MAX, ms, BASE_CV_MAX, BASE_MERGE_RATIO)
result['param'] = 'mu_sum_max'
result['param_value'] = ms
all_results.append(result)
print(f" mu_sum_max={ms}: pairs={result['num_pairs']}, dual={result['num_dual_visits']}, "
f"E1={result['E1']:.0f}, E2={result['E2']:.0f}, F1={result['F1']:.4f}, R1={result['R1']:.4f}")
# 4. CV阈值敏感性
print(f"\n" + "-" * 40)
print("4. CV阈值敏感性 (cv_max)")
print("-" * 40)
cv_max_values = [0.3, 0.4, 0.5, 0.6]
for cv in cv_max_values:
result = run_pipeline(sites_df, BASE_L_MAX, BASE_MU_SUM_MAX, cv, BASE_MERGE_RATIO)
result['param'] = 'cv_max'
result['param_value'] = cv
all_results.append(result)
print(f" cv_max={cv}: pairs={result['num_pairs']}, dual={result['num_dual_visits']}, "
f"E1={result['E1']:.0f}, E2={result['E2']:.0f}, F1={result['F1']:.4f}, R1={result['R1']:.4f}")
# ============================================
# 汇总分析
# ============================================
df_results = pd.DataFrame(all_results) df_results = pd.DataFrame(all_results)
print(f"\n" + "=" * 60) # ============================================
print("敏感性分析汇总") # 绘图
print("=" * 60) # ============================================
print("\n绘制敏感性曲线...")
# 计算各参数的影响范围 fig, axes = plt.subplots(2, 2, figsize=(15, 12))
for param in ['merge_ratio', 'l_max', 'mu_sum_max', 'cv_max']: fig.suptitle('Task 3 Sensitivity Analysis', fontsize=16, fontweight='bold')
subset = df_results[df_results['param'] == param]
print(f"\n{param}:") # 参数对应的中文标签和单位
print(f" E1 变化范围: [{subset['E1'].min():.0f}, {subset['E1'].max():.0f}], " param_labels = {
f"变化幅度: {(subset['E1'].max() - subset['E1'].min()) / base_result['E1'] * 100:.2f}%") 'merge_ratio': ('Merge Ratio', 'Ratio'),
print(f" E2 变化范围: [{subset['E2'].min():.0f}, {subset['E2'].max():.0f}], " 'l_max': ('Distance Threshold (l_max)', 'Miles'),
f"变化幅度: {(subset['E2'].max() - subset['E2'].min()) / base_result['E2'] * 100:.2f}%") 'mu_sum_max': ('Capacity Cap (μ_sum)', 'lbs (proxy)'),
print(f" F1 变化范围: [{subset['F1'].min():.4f}, {subset['F1'].max():.4f}]") 'cv_max': ('CV Threshold', 'CV')
print(f" R1 变化范围: [{subset['R1'].min():.4f}, {subset['R1'].max():.4f}]") }
# 绘图循环
params_list = ['merge_ratio', 'l_max', 'mu_sum_max', 'cv_max']
for i, param in enumerate(params_list):
ax = axes[i // 2, i % 2]
data = df_results[df_results['param'] == param].sort_values('value')
# 绘制 E1/E2 - 左轴
line1, = ax.plot(data['value'], data['E1'], 'b-', linewidth=1.6, label='Expected Service (E1)')
line2, = ax.plot(data['value'], data['E2'], 'g-', linewidth=1.6, alpha=0.9, label='Quality-weighted (E2)')
ax.set_xlabel(param_labels[param][0])
ax.set_ylabel('Service (E1/E2)')
# 绘制 R1 (风险) - 右轴
ax2 = ax.twinx()
line3, = ax2.plot(data['value'], data['R1'], 'r--', linewidth=1.6, label='Shortfall Risk (R1)')
ax2.set_ylabel('Risk Probability (R1)', color='r')
ax2.tick_params(axis='y', labelcolor='r')
# 添加基准线
if param == 'merge_ratio':
base_val = BASE_MERGE_RATIO
elif param == 'l_max':
base_val = BASE_L_MAX
elif param == 'mu_sum_max':
base_val = BASE_MU_SUM_MAX
elif param == 'cv_max':
base_val = BASE_CV_MAX
ax.axvline(x=base_val, color='gray', linestyle=':', alpha=0.5)
# 图例
lines = [line1, line2, line3]
labels = [l.get_label() for l in lines]
ax.legend(lines, labels, loc='best', frameon=True)
ax.set_title(f'Effect of {param_labels[param][0]}')
ax.grid(True, alpha=0.3)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.savefig(OUTPUT_FIG, dpi=300)
print(f"图表已保存至: {OUTPUT_FIG}")
# ============================================ # ============================================
# 保存结果 # 保存详细数据
# ============================================ # ============================================
OUTPUT_FILE = '07_sensitivity.xlsx' with pd.ExcelWriter(OUTPUT_XLSX, engine='openpyxl') as writer:
df_results.to_excel(writer, sheet_name='detailed_data', index=False)
with pd.ExcelWriter(OUTPUT_FILE, engine='openpyxl') as writer:
# Sheet 1: 所有结果 # 摘要统计
df_results.to_excel(writer, sheet_name='all_results', index=False) summary = []
for param in params_list:
# Sheet 2: 合并比例敏感性 sub = df_results[df_results['param'] == param]
df_merge = df_results[df_results['param'] == 'merge_ratio'].copy() summary.append({
df_merge.to_excel(writer, sheet_name='merge_ratio', index=False) 'Parameter': param,
'E1_Range': f"{sub['E1'].min():.0f} - {sub['E1'].max():.0f}",
# Sheet 3: 距离阈值敏感性 'E1_Var%': (sub['E1'].max() - sub['E1'].min()) / base_result['E1'] * 100,
df_lmax = df_results[df_results['param'] == 'l_max'].copy() 'E2_Range': f"{sub['E2'].min():.0f} - {sub['E2'].max():.0f}",
df_lmax.to_excel(writer, sheet_name='l_max', index=False) 'E2_Var%': (sub['E2'].max() - sub['E2'].min()) / base_result['E2'] * 100,
'R1_Range': f"{sub['R1'].min():.4f} - {sub['R1'].max():.4f}"
# Sheet 4: 容量上限敏感性
df_musum = df_results[df_results['param'] == 'mu_sum_max'].copy()
df_musum.to_excel(writer, sheet_name='mu_sum_max', index=False)
# Sheet 5: CV阈值敏感性
df_cv = df_results[df_results['param'] == 'cv_max'].copy()
df_cv.to_excel(writer, sheet_name='cv_max', index=False)
# Sheet 6: 基准结果
base_df = pd.DataFrame([{
'param': 'baseline',
'l_max': BASE_L_MAX,
'mu_sum_max': BASE_MU_SUM_MAX,
'cv_max': BASE_CV_MAX,
'merge_ratio': BASE_MERGE_RATIO,
**base_result
}])
base_df.to_excel(writer, sheet_name='baseline', index=False)
# Sheet 7: 汇总统计
summary_rows = []
for param in ['merge_ratio', 'l_max', 'mu_sum_max', 'cv_max']:
subset = df_results[df_results['param'] == param]
summary_rows.append({
'param': param,
'E1_min': subset['E1'].min(),
'E1_max': subset['E1'].max(),
'E1_range_pct': (subset['E1'].max() - subset['E1'].min()) / base_result['E1'] * 100,
'E2_min': subset['E2'].min(),
'E2_max': subset['E2'].max(),
'E2_range_pct': (subset['E2'].max() - subset['E2'].min()) / base_result['E2'] * 100,
'F1_min': subset['F1'].min(),
'F1_max': subset['F1'].max(),
'R1_min': subset['R1'].min(),
'R1_max': subset['R1'].max()
}) })
df_summary = pd.DataFrame(summary_rows) pd.DataFrame(summary).to_excel(writer, sheet_name='summary', index=False)
df_summary.to_excel(writer, sheet_name='summary', index=False)
print(f"\n结果已保存至: {OUTPUT_FILE}") print(f"数据已保存至: {OUTPUT_XLSX}")
print(" - Sheet 'all_results': 所有结果") print("完成!")
print(" - Sheet 'merge_ratio': 合并比例敏感性")
print(" - Sheet 'l_max': 距离阈值敏感性")
print(" - Sheet 'mu_sum_max': 容量上限敏感性")
print(" - Sheet 'cv_max': CV阈值敏感性")
print(" - Sheet 'baseline': 基准结果")
print(" - Sheet 'summary': 汇总统计")
print("\n" + "=" * 60)

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 692 KiB