P1: phos

2026-01-19 10:39:37 +08:00
parent eae9dabf0e
commit 03d353a86a
15 changed files with 1263 additions and 76 deletions
--- a/task1/09_visualize.py
+++ b/task1/09_visualize.py
@@ -0,0 +1,432 @@
+"""
+Step 09: 可视化
+
+输入: 01_clean.xlsx, 02_demand.xlsx, 03_allocate.xlsx, 04_metrics.xlsx,
+      05_schedule.xlsx, 08_sensitivity.xlsx
+输出: figures/*.png
+
+功能:
+1. Fig.1: 站点地图 (需求大小 + 访问频次)
+2. Fig.2: 需求修正对比 (修正前后μ)
+3. Fig.3: 频次分配分布 (k直方图)
+4. Fig.4: 有效性-公平性权衡 (E-F散点图)
+5. Fig.5: 日历热力图 (全年排程)
+6. Fig.6: 访问间隔箱线图
+7. Fig.7: 敏感性分析 (参数-指标折线图)
+"""
+
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+from pathlib import Path
+import warnings
+warnings.filterwarnings('ignore')
+
+# 设置中文字体 (macOS)
+plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'DejaVu Sans']
+plt.rcParams['axes.unicode_minus'] = False
+
+# 路径配置
+BASE_PATH = Path(__file__).parent
+FIGURES_PATH = BASE_PATH / "figures"
+FIGURES_PATH.mkdir(exist_ok=True)
+
+# 输入文件
+CLEAN_PATH = BASE_PATH / "01_clean.xlsx"
+DEMAND_PATH = BASE_PATH / "02_demand.xlsx"
+ALLOCATE_PATH = BASE_PATH / "03_allocate.xlsx"
+METRICS_PATH = BASE_PATH / "04_metrics.xlsx"
+SCHEDULE_PATH = BASE_PATH / "05_schedule.xlsx"
+SENSITIVITY_PATH = BASE_PATH / "08_sensitivity.xlsx"
+
+
+def fig1_site_map():
+    """Fig.1: 站点地图"""
+    print("  生成 Fig.1: 站点地图...")
+
+    df = pd.read_excel(ALLOCATE_PATH)
+
+    fig, ax = plt.subplots(figsize=(12, 10))
+
+    # 散点图: 大小=μ, 颜色=k
+    scatter = ax.scatter(
+        df['lon'], df['lat'],
+        s=df['mu'] * 0.8,  # 点大小与需求成正比
+        c=df['k'],
+        cmap='YlOrRd',
+        alpha=0.7,
+        edgecolors='black',
+        linewidths=0.5
+    )
+
+    # 标注高需求站点
+    high_demand = df[df['mu'] > 250]
+    for _, row in high_demand.iterrows():
+        ax.annotate(
+            f"{row['site_name'][:15]}\nμ={row['mu']:.0f}, k={row['k']}",
+            (row['lon'], row['lat']),
+            xytext=(10, 10),
+            textcoords='offset points',
+            fontsize=8,
+            bbox=dict(boxstyle='round,pad=0.3', facecolor='yellow', alpha=0.7)
+        )
+
+    # 颜色条
+    cbar = plt.colorbar(scatter, ax=ax, shrink=0.8)
+    cbar.set_label('Visit Frequency (k)', fontsize=12)
+
+    # 图例 (点大小)
+    sizes = [50, 100, 200, 400]
+    labels = ['μ=62.5', 'μ=125', 'μ=250', 'μ=500']
+    legend_elements = [
+        plt.scatter([], [], s=s * 0.8, c='gray', alpha=0.5, edgecolors='black', label=l)
+        for s, l in zip(sizes, labels)
+    ]
+    ax.legend(handles=legend_elements, title='Demand (μ)', loc='lower left', fontsize=9)
+
+    ax.set_xlabel('Longitude', fontsize=12)
+    ax.set_ylabel('Latitude', fontsize=12)
+    ax.set_title('Fig.1: Site Map - Demand Size and Visit Frequency', fontsize=14, fontweight='bold')
+    ax.grid(True, alpha=0.3)
+
+    plt.tight_layout()
+    plt.savefig(FIGURES_PATH / 'fig1_site_map.png', dpi=150, bbox_inches='tight')
+    plt.close()
+
+
+def fig2_demand_correction():
+    """Fig.2: 需求修正对比"""
+    print("  生成 Fig.2: 需求修正对比...")
+
+    df = pd.read_excel(DEMAND_PATH)
+
+    # 只显示被修正的站点
+    corrected = df[df['is_corrected']].copy()
+    corrected = corrected.sort_values('mu', ascending=False)
+
+    fig, ax = plt.subplots(figsize=(10, 6))
+
+    x = np.arange(len(corrected))
+    width = 0.35
+
+    bars1 = ax.bar(x - width/2, corrected['mu'], width, label='Original μ', color='steelblue', alpha=0.8)
+    bars2 = ax.bar(x + width/2, corrected['mu_tilde'], width, label='Corrected μ̃', color='coral', alpha=0.8)
+
+    # 添加数值标签
+    for bar, val in zip(bars1, corrected['mu']):
+        ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 5, f'{val:.0f}',
+                ha='center', va='bottom', fontsize=9)
+    for bar, val in zip(bars2, corrected['mu_tilde']):
+        ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 5, f'{val:.0f}',
+                ha='center', va='bottom', fontsize=9, color='coral')
+
+    # 添加p_trunc标注
+    for i, (_, row) in enumerate(corrected.iterrows()):
+        ax.text(i, max(row['mu'], row['mu_tilde']) + 25,
+                f"p={row['p_trunc']:.2%}",
+                ha='center', fontsize=8, style='italic')
+
+    ax.set_xlabel('Site', fontsize=12)
+    ax.set_ylabel('Demand per Visit', fontsize=12)
+    ax.set_title('Fig.2: Truncation Correction for High-Demand Sites', fontsize=14, fontweight='bold')
+    ax.set_xticks(x)
+    ax.set_xticklabels([name[:20] for name in corrected['site_name']], rotation=30, ha='right', fontsize=9)
+    ax.legend(fontsize=10)
+    ax.set_ylim(0, corrected['mu_tilde'].max() * 1.2)
+    ax.grid(True, axis='y', alpha=0.3)
+
+    plt.tight_layout()
+    plt.savefig(FIGURES_PATH / 'fig2_demand_correction.png', dpi=150, bbox_inches='tight')
+    plt.close()
+
+
+def fig3_k_distribution():
+    """Fig.3: 频次分配分布"""
+    print("  生成 Fig.3: 频次分配分布...")
+
+    df = pd.read_excel(ALLOCATE_PATH)
+
+    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
+
+    # 左图: k的直方图
+    ax1 = axes[0]
+    bins = np.arange(df['k'].min() - 0.5, df['k'].max() + 1.5, 1)
+    ax1.hist(df['k'], bins=bins, color='steelblue', edgecolor='black', alpha=0.7)
+    ax1.axvline(df['k'].mean(), color='red', linestyle='--', linewidth=2, label=f'Mean = {df["k"].mean():.1f}')
+    ax1.axvline(df['k'].median(), color='green', linestyle=':', linewidth=2, label=f'Median = {df["k"].median():.0f}')
+    ax1.set_xlabel('Visit Frequency (k)', fontsize=12)
+    ax1.set_ylabel('Number of Sites', fontsize=12)
+    ax1.set_title('(a) Distribution of Visit Frequencies', fontsize=12)
+    ax1.legend(fontsize=10)
+    ax1.grid(True, alpha=0.3)
+
+    # 右图: k与μ̃的关系
+    ax2 = axes[1]
+    # mu_tilde already in allocate file
+    ax2.scatter(df['mu_tilde'], df['k'], alpha=0.6, s=60, edgecolors='black', linewidths=0.5)
+
+    # 拟合线
+    z = np.polyfit(df['mu_tilde'], df['k'], 1)
+    p = np.poly1d(z)
+    x_fit = np.linspace(df['mu_tilde'].min(), df['mu_tilde'].max(), 100)
+    ax2.plot(x_fit, p(x_fit), 'r--', linewidth=2, label=f'Linear fit: k = {z[0]:.3f}μ̃ + {z[1]:.1f}')
+
+    # 相关系数
+    corr = np.corrcoef(df['mu_tilde'], df['k'])[0, 1]
+    ax2.text(0.05, 0.95, f'r = {corr:.4f}', transform=ax2.transAxes, fontsize=11,
+             verticalalignment='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
+
+    ax2.set_xlabel('Corrected Demand (μ̃)', fontsize=12)
+    ax2.set_ylabel('Visit Frequency (k)', fontsize=12)
+    ax2.set_title('(b) k vs μ̃ (Proportionality Check)', fontsize=12)
+    ax2.legend(fontsize=10)
+    ax2.grid(True, alpha=0.3)
+
+    plt.suptitle('Fig.3: Visit Frequency Allocation Analysis', fontsize=14, fontweight='bold', y=1.02)
+    plt.tight_layout()
+    plt.savefig(FIGURES_PATH / 'fig3_k_distribution.png', dpi=150, bbox_inches='tight')
+    plt.close()
+
+
+def fig4_efficiency_fairness():
+    """Fig.4: 有效性-公平性权衡"""
+    print("  生成 Fig.4: 有效性-公平性权衡...")
+
+    df = pd.read_excel(METRICS_PATH, sheet_name='metrics_summary')
+
+    fig, ax = plt.subplots(figsize=(10, 8))
+
+    # 绘制所有方案
+    colors = ['red', 'blue', 'green', 'orange']
+    markers = ['o', 's', '^', 'D']
+
+    for i, row in df.iterrows():
+        ax.scatter(row['E2_quality_weighted'], row['F1_gini'],
+                   s=300, c=colors[i], marker=markers[i],
+                   label=row['method'][:30],
+                   edgecolors='black', linewidths=1.5, zorder=5)
+
+        # 标注
+        offset = (15, 15) if i == 0 else (-15, -15) if i == 1 else (15, -15)
+        ax.annotate(f"E1={row['E1_total_service']:.0f}\nE2={row['E2_quality_weighted']:.0f}\nGini={row['F1_gini']:.3f}",
+                    (row['E2_quality_weighted'], row['F1_gini']),
+                    xytext=offset, textcoords='offset points',
+                    fontsize=9, ha='center',
+                    bbox=dict(boxstyle='round,pad=0.3', facecolor='lightyellow', alpha=0.8),
+                    arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0'))
+
+    # 添加权衡箭头
+    ax.annotate('', xy=(135000, 0.05), xytext=(105000, 0.30),
+                arrowprops=dict(arrowstyle='<->', color='purple', lw=2))
+    ax.text(115000, 0.20, 'Efficiency-Fairness\nTradeoff', fontsize=10, ha='center',
+            color='purple', style='italic')
+
+    ax.set_xlabel('E2 (Quality-Weighted Service Volume)', fontsize=12)
+    ax.set_ylabel('F1 (Gini Coefficient, lower = fairer)', fontsize=12)
+    ax.set_title('Fig.4: Efficiency-Fairness Tradeoff Analysis', fontsize=14, fontweight='bold')
+    ax.legend(loc='upper right', fontsize=10)
+    ax.grid(True, alpha=0.3)
+
+    # 设置轴范围
+    ax.set_xlim(95000, 140000)
+    ax.set_ylim(0, 0.40)
+
+    plt.tight_layout()
+    plt.savefig(FIGURES_PATH / 'fig4_efficiency_fairness.png', dpi=150, bbox_inches='tight')
+    plt.close()
+
+
+def fig5_calendar_heatmap():
+    """Fig.5: 日历热力图"""
+    print("  生成 Fig.5: 日历热力图...")
+
+    df_calendar = pd.read_excel(SCHEDULE_PATH, sheet_name='calendar')
+    df_allocate = pd.read_excel(ALLOCATE_PATH)
+
+    # 创建站点μ映射
+    mu_map = dict(zip(df_allocate['site_id'], df_allocate['mu']))
+
+    # 计算每天的总需求
+    daily_demand = []
+    for _, row in df_calendar.iterrows():
+        demand = 0
+        if pd.notna(row['site_1_id']):
+            demand += mu_map.get(int(row['site_1_id']), 0)
+        if pd.notna(row['site_2_id']):
+            demand += mu_map.get(int(row['site_2_id']), 0)
+        daily_demand.append(demand)
+
+    df_calendar['total_demand'] = daily_demand
+
+    # 创建12x31的热力图矩阵
+    heatmap_data = np.full((12, 31), np.nan)
+
+    for _, row in df_calendar.iterrows():
+        day = row['day']
+        # 简单映射: 假设每月30/31天
+        month = (day - 1) // 31
+        day_of_month = (day - 1) % 31
+        if month < 12:
+            heatmap_data[month, day_of_month] = row['total_demand']
+
+    fig, ax = plt.subplots(figsize=(14, 8))
+
+    im = ax.imshow(heatmap_data, cmap='YlOrRd', aspect='auto', interpolation='nearest')
+
+    # 颜色条
+    cbar = plt.colorbar(im, ax=ax, shrink=0.8)
+    cbar.set_label('Daily Total Demand (μ₁ + μ₂)', fontsize=11)
+
+    # 轴标签
+    ax.set_xticks(np.arange(31))
+    ax.set_xticklabels(np.arange(1, 32), fontsize=8)
+    ax.set_yticks(np.arange(12))
+    ax.set_yticklabels(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                        'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], fontsize=10)
+
+    ax.set_xlabel('Day of Month', fontsize=12)
+    ax.set_ylabel('Month', fontsize=12)
+    ax.set_title('Fig.5: Annual Schedule Calendar Heatmap (Daily Demand)', fontsize=14, fontweight='bold')
+
+    plt.tight_layout()
+    plt.savefig(FIGURES_PATH / 'fig5_calendar_heatmap.png', dpi=150, bbox_inches='tight')
+    plt.close()
+
+
+def fig6_gap_boxplot():
+    """Fig.6: 访问间隔箱线图"""
+    print("  生成 Fig.6: 访问间隔箱线图...")
+
+    df_gaps = pd.read_excel(SCHEDULE_PATH, sheet_name='gap_statistics')
+
+    # 过滤有效数据
+    df_valid = df_gaps[df_gaps['gap_mean'].notna()].copy()
+
+    # 按k分组
+    df_valid['k_group'] = pd.cut(df_valid['k'], bins=[0, 5, 10, 15, 20, 40],
+                                  labels=['1-5', '6-10', '11-15', '16-20', '21+'])
+
+    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
+
+    # 左图: 间隔均值按k分组的箱线图
+    ax1 = axes[0]
+    groups = df_valid.groupby('k_group')['gap_mean'].apply(list).values
+    group_labels = ['1-5', '6-10', '11-15', '16-20', '21+']
+
+    bp = ax1.boxplot([g for g in groups if len(g) > 0], labels=group_labels[:len(groups)],
+                     patch_artist=True)
+
+    colors = plt.cm.Blues(np.linspace(0.3, 0.8, len(groups)))
+    for patch, color in zip(bp['boxes'], colors):
+        patch.set_facecolor(color)
+
+    ax1.set_xlabel('Visit Frequency Group (k)', fontsize=12)
+    ax1.set_ylabel('Mean Gap (days)', fontsize=12)
+    ax1.set_title('(a) Mean Visit Interval by Frequency Group', fontsize=12)
+    ax1.grid(True, alpha=0.3)
+
+    # 右图: 间隔CV的分布
+    ax2 = axes[1]
+    ax2.hist(df_valid['gap_cv'], bins=20, color='steelblue', edgecolor='black', alpha=0.7)
+    ax2.axvline(df_valid['gap_cv'].mean(), color='red', linestyle='--', linewidth=2,
+                label=f'Mean CV = {df_valid["gap_cv"].mean():.3f}')
+    ax2.axvline(df_valid['gap_cv'].median(), color='green', linestyle=':', linewidth=2,
+                label=f'Median CV = {df_valid["gap_cv"].median():.3f}')
+
+    ax2.set_xlabel('Coefficient of Variation (CV) of Gaps', fontsize=12)
+    ax2.set_ylabel('Number of Sites', fontsize=12)
+    ax2.set_title('(b) Distribution of Gap Regularity (CV)', fontsize=12)
+    ax2.legend(fontsize=10)
+    ax2.grid(True, alpha=0.3)
+
+    plt.suptitle('Fig.6: Visit Interval Analysis', fontsize=14, fontweight='bold', y=1.02)
+    plt.tight_layout()
+    plt.savefig(FIGURES_PATH / 'fig6_gap_boxplot.png', dpi=150, bbox_inches='tight')
+    plt.close()
+
+
+def fig7_sensitivity():
+    """Fig.7: 敏感性分析"""
+    print("  生成 Fig.7: 敏感性分析...")
+
+    # 读取敏感性分析结果
+    df_C = pd.read_excel(SENSITIVITY_PATH, sheet_name='sensitivity_C')
+    df_p = pd.read_excel(SENSITIVITY_PATH, sheet_name='sensitivity_p_thresh')
+    df_cbar = pd.read_excel(SENSITIVITY_PATH, sheet_name='sensitivity_c_bar')
+
+    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
+
+    # (a) C对E1的影响
+    ax1 = axes[0, 0]
+    ax1.plot(df_C['C'], df_C['E1'], 'o-', color='steelblue', linewidth=2, markersize=8)
+    ax1.axhline(df_C[df_C['C'] == 400]['E1'].values[0], color='red', linestyle='--', alpha=0.5, label='Baseline (C=400)')
+    ax1.set_xlabel('Effective Capacity (C)', fontsize=11)
+    ax1.set_ylabel('E1 (Total Service Volume)', fontsize=11)
+    ax1.set_title('(a) Effect of C on E1', fontsize=12)
+    ax1.legend(fontsize=9)
+    ax1.grid(True, alpha=0.3)
+
+    # (b) C对修正站点数的影响
+    ax2 = axes[0, 1]
+    ax2.bar(df_C['C'].astype(str), df_C['n_corrected'], color='coral', edgecolor='black', alpha=0.7)
+    ax2.set_xlabel('Effective Capacity (C)', fontsize=11)
+    ax2.set_ylabel('Number of Corrected Sites', fontsize=11)
+    ax2.set_title('(b) Effect of C on Correction Count', fontsize=12)
+    ax2.grid(True, axis='y', alpha=0.3)
+
+    # (c) p_thresh对指标的影响
+    ax3 = axes[1, 0]
+    ax3.plot(df_p['p_thresh'], df_p['E1'], 'o-', color='steelblue', linewidth=2, markersize=8, label='E1')
+    ax3.set_xlabel('Truncation Threshold (p_thresh)', fontsize=11)
+    ax3.set_ylabel('E1 (Total Service Volume)', fontsize=11)
+    ax3.set_title('(c) Effect of p_thresh on E1', fontsize=12)
+    ax3.legend(fontsize=9)
+    ax3.grid(True, alpha=0.3)
+
+    # (d) c_bar对E2的影响
+    ax4 = axes[1, 1]
+    ax4.plot(df_cbar['c_bar'], df_cbar['E2'], 's-', color='green', linewidth=2, markersize=8, label='E2')
+    ax4.axhline(df_cbar[df_cbar['c_bar'] == 250]['E2'].values[0], color='red', linestyle='--', alpha=0.5, label='Baseline (c̄=250)')
+    ax4.set_xlabel('Quality Threshold (c̄)', fontsize=11)
+    ax4.set_ylabel('E2 (Quality-Weighted Service)', fontsize=11)
+    ax4.set_title('(d) Effect of c̄ on E2', fontsize=12)
+    ax4.legend(fontsize=9)
+    ax4.grid(True, alpha=0.3)
+
+    plt.suptitle('Fig.7: Sensitivity Analysis of Model Parameters', fontsize=14, fontweight='bold', y=1.02)
+    plt.tight_layout()
+    plt.savefig(FIGURES_PATH / 'fig7_sensitivity.png', dpi=150, bbox_inches='tight')
+    plt.close()
+
+
+def main():
+    print("=" * 60)
+    print("Step 09: 可视化")
+    print("=" * 60)
+
+    print(f"\n输出目录: {FIGURES_PATH}")
+
+    # 生成所有图表
+    print("\n[1] 生成图表...")
+
+    fig1_site_map()
+    fig2_demand_correction()
+    fig3_k_distribution()
+    fig4_efficiency_fairness()
+    fig5_calendar_heatmap()
+    fig6_gap_boxplot()
+    fig7_sensitivity()
+
+    # 列出生成的文件
+    print(f"\n[2] 已生成图表:")
+    for f in sorted(FIGURES_PATH.glob('*.png')):
+        print(f"    {f.name}")
+
+    print("\n" + "=" * 60)
+    print("Step 09 完成")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    main()