This commit is contained in:
2026-01-19 10:39:37 +08:00
parent eae9dabf0e
commit 03d353a86a
15 changed files with 1263 additions and 76 deletions

432
task1/09_visualize.py Normal file
View File

@@ -0,0 +1,432 @@
"""
Step 09: 可视化
输入: 01_clean.xlsx, 02_demand.xlsx, 03_allocate.xlsx, 04_metrics.xlsx,
05_schedule.xlsx, 08_sensitivity.xlsx
输出: figures/*.png
功能:
1. Fig.1: 站点地图 (需求大小 + 访问频次)
2. Fig.2: 需求修正对比 (修正前后μ)
3. Fig.3: 频次分配分布 (k直方图)
4. Fig.4: 有效性-公平性权衡 (E-F散点图)
5. Fig.5: 日历热力图 (全年排程)
6. Fig.6: 访问间隔箱线图
7. Fig.7: 敏感性分析 (参数-指标折线图)
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')
# 设置中文字体 (macOS)
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False
# 路径配置
BASE_PATH = Path(__file__).parent
FIGURES_PATH = BASE_PATH / "figures"
FIGURES_PATH.mkdir(exist_ok=True)
# 输入文件
CLEAN_PATH = BASE_PATH / "01_clean.xlsx"
DEMAND_PATH = BASE_PATH / "02_demand.xlsx"
ALLOCATE_PATH = BASE_PATH / "03_allocate.xlsx"
METRICS_PATH = BASE_PATH / "04_metrics.xlsx"
SCHEDULE_PATH = BASE_PATH / "05_schedule.xlsx"
SENSITIVITY_PATH = BASE_PATH / "08_sensitivity.xlsx"
def fig1_site_map():
"""Fig.1: 站点地图"""
print(" 生成 Fig.1: 站点地图...")
df = pd.read_excel(ALLOCATE_PATH)
fig, ax = plt.subplots(figsize=(12, 10))
# 散点图: 大小=μ, 颜色=k
scatter = ax.scatter(
df['lon'], df['lat'],
s=df['mu'] * 0.8, # 点大小与需求成正比
c=df['k'],
cmap='YlOrRd',
alpha=0.7,
edgecolors='black',
linewidths=0.5
)
# 标注高需求站点
high_demand = df[df['mu'] > 250]
for _, row in high_demand.iterrows():
ax.annotate(
f"{row['site_name'][:15]}\nμ={row['mu']:.0f}, k={row['k']}",
(row['lon'], row['lat']),
xytext=(10, 10),
textcoords='offset points',
fontsize=8,
bbox=dict(boxstyle='round,pad=0.3', facecolor='yellow', alpha=0.7)
)
# 颜色条
cbar = plt.colorbar(scatter, ax=ax, shrink=0.8)
cbar.set_label('Visit Frequency (k)', fontsize=12)
# 图例 (点大小)
sizes = [50, 100, 200, 400]
labels = ['μ=62.5', 'μ=125', 'μ=250', 'μ=500']
legend_elements = [
plt.scatter([], [], s=s * 0.8, c='gray', alpha=0.5, edgecolors='black', label=l)
for s, l in zip(sizes, labels)
]
ax.legend(handles=legend_elements, title='Demand (μ)', loc='lower left', fontsize=9)
ax.set_xlabel('Longitude', fontsize=12)
ax.set_ylabel('Latitude', fontsize=12)
ax.set_title('Fig.1: Site Map - Demand Size and Visit Frequency', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(FIGURES_PATH / 'fig1_site_map.png', dpi=150, bbox_inches='tight')
plt.close()
def fig2_demand_correction():
"""Fig.2: 需求修正对比"""
print(" 生成 Fig.2: 需求修正对比...")
df = pd.read_excel(DEMAND_PATH)
# 只显示被修正的站点
corrected = df[df['is_corrected']].copy()
corrected = corrected.sort_values('mu', ascending=False)
fig, ax = plt.subplots(figsize=(10, 6))
x = np.arange(len(corrected))
width = 0.35
bars1 = ax.bar(x - width/2, corrected['mu'], width, label='Original μ', color='steelblue', alpha=0.8)
bars2 = ax.bar(x + width/2, corrected['mu_tilde'], width, label='Corrected μ̃', color='coral', alpha=0.8)
# 添加数值标签
for bar, val in zip(bars1, corrected['mu']):
ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 5, f'{val:.0f}',
ha='center', va='bottom', fontsize=9)
for bar, val in zip(bars2, corrected['mu_tilde']):
ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 5, f'{val:.0f}',
ha='center', va='bottom', fontsize=9, color='coral')
# 添加p_trunc标注
for i, (_, row) in enumerate(corrected.iterrows()):
ax.text(i, max(row['mu'], row['mu_tilde']) + 25,
f"p={row['p_trunc']:.2%}",
ha='center', fontsize=8, style='italic')
ax.set_xlabel('Site', fontsize=12)
ax.set_ylabel('Demand per Visit', fontsize=12)
ax.set_title('Fig.2: Truncation Correction for High-Demand Sites', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels([name[:20] for name in corrected['site_name']], rotation=30, ha='right', fontsize=9)
ax.legend(fontsize=10)
ax.set_ylim(0, corrected['mu_tilde'].max() * 1.2)
ax.grid(True, axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig(FIGURES_PATH / 'fig2_demand_correction.png', dpi=150, bbox_inches='tight')
plt.close()
def fig3_k_distribution():
"""Fig.3: 频次分配分布"""
print(" 生成 Fig.3: 频次分配分布...")
df = pd.read_excel(ALLOCATE_PATH)
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# 左图: k的直方图
ax1 = axes[0]
bins = np.arange(df['k'].min() - 0.5, df['k'].max() + 1.5, 1)
ax1.hist(df['k'], bins=bins, color='steelblue', edgecolor='black', alpha=0.7)
ax1.axvline(df['k'].mean(), color='red', linestyle='--', linewidth=2, label=f'Mean = {df["k"].mean():.1f}')
ax1.axvline(df['k'].median(), color='green', linestyle=':', linewidth=2, label=f'Median = {df["k"].median():.0f}')
ax1.set_xlabel('Visit Frequency (k)', fontsize=12)
ax1.set_ylabel('Number of Sites', fontsize=12)
ax1.set_title('(a) Distribution of Visit Frequencies', fontsize=12)
ax1.legend(fontsize=10)
ax1.grid(True, alpha=0.3)
# 右图: k与μ̃的关系
ax2 = axes[1]
# mu_tilde already in allocate file
ax2.scatter(df['mu_tilde'], df['k'], alpha=0.6, s=60, edgecolors='black', linewidths=0.5)
# 拟合线
z = np.polyfit(df['mu_tilde'], df['k'], 1)
p = np.poly1d(z)
x_fit = np.linspace(df['mu_tilde'].min(), df['mu_tilde'].max(), 100)
ax2.plot(x_fit, p(x_fit), 'r--', linewidth=2, label=f'Linear fit: k = {z[0]:.3f}μ̃ + {z[1]:.1f}')
# 相关系数
corr = np.corrcoef(df['mu_tilde'], df['k'])[0, 1]
ax2.text(0.05, 0.95, f'r = {corr:.4f}', transform=ax2.transAxes, fontsize=11,
verticalalignment='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
ax2.set_xlabel('Corrected Demand (μ̃)', fontsize=12)
ax2.set_ylabel('Visit Frequency (k)', fontsize=12)
ax2.set_title('(b) k vs μ̃ (Proportionality Check)', fontsize=12)
ax2.legend(fontsize=10)
ax2.grid(True, alpha=0.3)
plt.suptitle('Fig.3: Visit Frequency Allocation Analysis', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig(FIGURES_PATH / 'fig3_k_distribution.png', dpi=150, bbox_inches='tight')
plt.close()
def fig4_efficiency_fairness():
"""Fig.4: 有效性-公平性权衡"""
print(" 生成 Fig.4: 有效性-公平性权衡...")
df = pd.read_excel(METRICS_PATH, sheet_name='metrics_summary')
fig, ax = plt.subplots(figsize=(10, 8))
# 绘制所有方案
colors = ['red', 'blue', 'green', 'orange']
markers = ['o', 's', '^', 'D']
for i, row in df.iterrows():
ax.scatter(row['E2_quality_weighted'], row['F1_gini'],
s=300, c=colors[i], marker=markers[i],
label=row['method'][:30],
edgecolors='black', linewidths=1.5, zorder=5)
# 标注
offset = (15, 15) if i == 0 else (-15, -15) if i == 1 else (15, -15)
ax.annotate(f"E1={row['E1_total_service']:.0f}\nE2={row['E2_quality_weighted']:.0f}\nGini={row['F1_gini']:.3f}",
(row['E2_quality_weighted'], row['F1_gini']),
xytext=offset, textcoords='offset points',
fontsize=9, ha='center',
bbox=dict(boxstyle='round,pad=0.3', facecolor='lightyellow', alpha=0.8),
arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0'))
# 添加权衡箭头
ax.annotate('', xy=(135000, 0.05), xytext=(105000, 0.30),
arrowprops=dict(arrowstyle='<->', color='purple', lw=2))
ax.text(115000, 0.20, 'Efficiency-Fairness\nTradeoff', fontsize=10, ha='center',
color='purple', style='italic')
ax.set_xlabel('E2 (Quality-Weighted Service Volume)', fontsize=12)
ax.set_ylabel('F1 (Gini Coefficient, lower = fairer)', fontsize=12)
ax.set_title('Fig.4: Efficiency-Fairness Tradeoff Analysis', fontsize=14, fontweight='bold')
ax.legend(loc='upper right', fontsize=10)
ax.grid(True, alpha=0.3)
# 设置轴范围
ax.set_xlim(95000, 140000)
ax.set_ylim(0, 0.40)
plt.tight_layout()
plt.savefig(FIGURES_PATH / 'fig4_efficiency_fairness.png', dpi=150, bbox_inches='tight')
plt.close()
def fig5_calendar_heatmap():
"""Fig.5: 日历热力图"""
print(" 生成 Fig.5: 日历热力图...")
df_calendar = pd.read_excel(SCHEDULE_PATH, sheet_name='calendar')
df_allocate = pd.read_excel(ALLOCATE_PATH)
# 创建站点μ映射
mu_map = dict(zip(df_allocate['site_id'], df_allocate['mu']))
# 计算每天的总需求
daily_demand = []
for _, row in df_calendar.iterrows():
demand = 0
if pd.notna(row['site_1_id']):
demand += mu_map.get(int(row['site_1_id']), 0)
if pd.notna(row['site_2_id']):
demand += mu_map.get(int(row['site_2_id']), 0)
daily_demand.append(demand)
df_calendar['total_demand'] = daily_demand
# 创建12x31的热力图矩阵
heatmap_data = np.full((12, 31), np.nan)
for _, row in df_calendar.iterrows():
day = row['day']
# 简单映射: 假设每月30/31天
month = (day - 1) // 31
day_of_month = (day - 1) % 31
if month < 12:
heatmap_data[month, day_of_month] = row['total_demand']
fig, ax = plt.subplots(figsize=(14, 8))
im = ax.imshow(heatmap_data, cmap='YlOrRd', aspect='auto', interpolation='nearest')
# 颜色条
cbar = plt.colorbar(im, ax=ax, shrink=0.8)
cbar.set_label('Daily Total Demand (μ₁ + μ₂)', fontsize=11)
# 轴标签
ax.set_xticks(np.arange(31))
ax.set_xticklabels(np.arange(1, 32), fontsize=8)
ax.set_yticks(np.arange(12))
ax.set_yticklabels(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], fontsize=10)
ax.set_xlabel('Day of Month', fontsize=12)
ax.set_ylabel('Month', fontsize=12)
ax.set_title('Fig.5: Annual Schedule Calendar Heatmap (Daily Demand)', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig(FIGURES_PATH / 'fig5_calendar_heatmap.png', dpi=150, bbox_inches='tight')
plt.close()
def fig6_gap_boxplot():
"""Fig.6: 访问间隔箱线图"""
print(" 生成 Fig.6: 访问间隔箱线图...")
df_gaps = pd.read_excel(SCHEDULE_PATH, sheet_name='gap_statistics')
# 过滤有效数据
df_valid = df_gaps[df_gaps['gap_mean'].notna()].copy()
# 按k分组
df_valid['k_group'] = pd.cut(df_valid['k'], bins=[0, 5, 10, 15, 20, 40],
labels=['1-5', '6-10', '11-15', '16-20', '21+'])
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
# 左图: 间隔均值按k分组的箱线图
ax1 = axes[0]
groups = df_valid.groupby('k_group')['gap_mean'].apply(list).values
group_labels = ['1-5', '6-10', '11-15', '16-20', '21+']
bp = ax1.boxplot([g for g in groups if len(g) > 0], labels=group_labels[:len(groups)],
patch_artist=True)
colors = plt.cm.Blues(np.linspace(0.3, 0.8, len(groups)))
for patch, color in zip(bp['boxes'], colors):
patch.set_facecolor(color)
ax1.set_xlabel('Visit Frequency Group (k)', fontsize=12)
ax1.set_ylabel('Mean Gap (days)', fontsize=12)
ax1.set_title('(a) Mean Visit Interval by Frequency Group', fontsize=12)
ax1.grid(True, alpha=0.3)
# 右图: 间隔CV的分布
ax2 = axes[1]
ax2.hist(df_valid['gap_cv'], bins=20, color='steelblue', edgecolor='black', alpha=0.7)
ax2.axvline(df_valid['gap_cv'].mean(), color='red', linestyle='--', linewidth=2,
label=f'Mean CV = {df_valid["gap_cv"].mean():.3f}')
ax2.axvline(df_valid['gap_cv'].median(), color='green', linestyle=':', linewidth=2,
label=f'Median CV = {df_valid["gap_cv"].median():.3f}')
ax2.set_xlabel('Coefficient of Variation (CV) of Gaps', fontsize=12)
ax2.set_ylabel('Number of Sites', fontsize=12)
ax2.set_title('(b) Distribution of Gap Regularity (CV)', fontsize=12)
ax2.legend(fontsize=10)
ax2.grid(True, alpha=0.3)
plt.suptitle('Fig.6: Visit Interval Analysis', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig(FIGURES_PATH / 'fig6_gap_boxplot.png', dpi=150, bbox_inches='tight')
plt.close()
def fig7_sensitivity():
"""Fig.7: 敏感性分析"""
print(" 生成 Fig.7: 敏感性分析...")
# 读取敏感性分析结果
df_C = pd.read_excel(SENSITIVITY_PATH, sheet_name='sensitivity_C')
df_p = pd.read_excel(SENSITIVITY_PATH, sheet_name='sensitivity_p_thresh')
df_cbar = pd.read_excel(SENSITIVITY_PATH, sheet_name='sensitivity_c_bar')
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
# (a) C对E1的影响
ax1 = axes[0, 0]
ax1.plot(df_C['C'], df_C['E1'], 'o-', color='steelblue', linewidth=2, markersize=8)
ax1.axhline(df_C[df_C['C'] == 400]['E1'].values[0], color='red', linestyle='--', alpha=0.5, label='Baseline (C=400)')
ax1.set_xlabel('Effective Capacity (C)', fontsize=11)
ax1.set_ylabel('E1 (Total Service Volume)', fontsize=11)
ax1.set_title('(a) Effect of C on E1', fontsize=12)
ax1.legend(fontsize=9)
ax1.grid(True, alpha=0.3)
# (b) C对修正站点数的影响
ax2 = axes[0, 1]
ax2.bar(df_C['C'].astype(str), df_C['n_corrected'], color='coral', edgecolor='black', alpha=0.7)
ax2.set_xlabel('Effective Capacity (C)', fontsize=11)
ax2.set_ylabel('Number of Corrected Sites', fontsize=11)
ax2.set_title('(b) Effect of C on Correction Count', fontsize=12)
ax2.grid(True, axis='y', alpha=0.3)
# (c) p_thresh对指标的影响
ax3 = axes[1, 0]
ax3.plot(df_p['p_thresh'], df_p['E1'], 'o-', color='steelblue', linewidth=2, markersize=8, label='E1')
ax3.set_xlabel('Truncation Threshold (p_thresh)', fontsize=11)
ax3.set_ylabel('E1 (Total Service Volume)', fontsize=11)
ax3.set_title('(c) Effect of p_thresh on E1', fontsize=12)
ax3.legend(fontsize=9)
ax3.grid(True, alpha=0.3)
# (d) c_bar对E2的影响
ax4 = axes[1, 1]
ax4.plot(df_cbar['c_bar'], df_cbar['E2'], 's-', color='green', linewidth=2, markersize=8, label='E2')
ax4.axhline(df_cbar[df_cbar['c_bar'] == 250]['E2'].values[0], color='red', linestyle='--', alpha=0.5, label='Baseline (c̄=250)')
ax4.set_xlabel('Quality Threshold (c̄)', fontsize=11)
ax4.set_ylabel('E2 (Quality-Weighted Service)', fontsize=11)
ax4.set_title('(d) Effect of c̄ on E2', fontsize=12)
ax4.legend(fontsize=9)
ax4.grid(True, alpha=0.3)
plt.suptitle('Fig.7: Sensitivity Analysis of Model Parameters', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig(FIGURES_PATH / 'fig7_sensitivity.png', dpi=150, bbox_inches='tight')
plt.close()
def main():
print("=" * 60)
print("Step 09: 可视化")
print("=" * 60)
print(f"\n输出目录: {FIGURES_PATH}")
# 生成所有图表
print("\n[1] 生成图表...")
fig1_site_map()
fig2_demand_correction()
fig3_k_distribution()
fig4_efficiency_fairness()
fig5_calendar_heatmap()
fig6_gap_boxplot()
fig7_sensitivity()
# 列出生成的文件
print(f"\n[2] 已生成图表:")
for f in sorted(FIGURES_PATH.glob('*.png')):
print(f" {f.name}")
print("\n" + "=" * 60)
print("Step 09 完成")
print("=" * 60)
if __name__ == "__main__":
main()