Files
mcm-mfp/task1/09_visualize.py
2026-01-19 19:43:57 +08:00

591 lines
21 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Step 09: 可视化
输入: 01_clean.xlsx, 02_demand.xlsx, 03_allocate.xlsx, 04_metrics.xlsx,
05_schedule.xlsx, 08_sensitivity.xlsx
输出: figures/*.png
功能:
1. Fig.1: 站点地图 (需求大小 + 访问频次)
2. Fig.2: 需求修正对比 (修正前后μ)
3. Fig.3: 频次分配分布 (k直方图)
4. Fig.4: 有效性-公平性权衡 (E-F散点图)
5. Fig.5: 日历热力图 (全年排程)
6. Fig.6: 访问间隔箱线图
7. Fig.7: 敏感性分析 (参数-指标折线图)
"""
from __future__ import annotations
import os
import pandas as pd
import numpy as np
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')
# 避免 matplotlib/fontconfig 在不可写目录建缓存导致的告警/性能问题
os.environ.setdefault("MPLCONFIGDIR", str((Path(__file__).parent / ".mpl_cache").resolve()))
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import json
# 设置中文字体 (macOS)
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False
# 论文风格主题(参考 tu.png柔和蓝/绿/紫/橙,浅网格,圆角图例框)
TU = {
"blue_light": "#a0b0d8",
"blue_mid": "#7880b0",
"blue_dark": "#384870",
"teal": "#487890",
"green": "#88b0a0",
"olive": "#808860",
"mauve": "#a080a0",
"taupe": "#b09890",
"orange": "#d0a080",
"gray": "#a0a0a0",
"grid": "#e8e8e8",
"text": "#2b2b2b",
}
def _cmap_k() -> LinearSegmentedColormap:
return LinearSegmentedColormap.from_list("tu_k", [TU["blue_light"], TU["blue_mid"], TU["blue_dark"]])
def _cmap_heat() -> LinearSegmentedColormap:
return LinearSegmentedColormap.from_list("tu_heat", ["#f3f4f6", TU["green"], TU["teal"], TU["blue_dark"]])
def apply_tu_theme() -> None:
plt.rcParams.update(
{
"figure.facecolor": "white",
"axes.facecolor": "white",
"axes.edgecolor": TU["gray"],
"axes.labelcolor": TU["text"],
"xtick.color": TU["text"],
"ytick.color": TU["text"],
"axes.titlecolor": TU["blue_dark"],
"axes.titleweight": "bold",
"axes.grid": True,
"grid.color": TU["grid"],
"grid.linewidth": 0.8,
"grid.alpha": 1.0,
"axes.spines.top": False,
"axes.spines.right": False,
"legend.frameon": True,
"legend.fancybox": True,
"legend.framealpha": 0.92,
"legend.edgecolor": TU["gray"],
"legend.facecolor": "#f8f8f8",
}
)
def style_axes(ax, *, grid_axis: str = "both") -> None:
ax.grid(True, axis=grid_axis, linestyle="-", alpha=1.0)
ax.tick_params(width=0.8)
for side in ("left", "bottom"):
ax.spines[side].set_color(TU["gray"])
ax.spines[side].set_linewidth(0.9)
# 路径配置
BASE_PATH = Path(__file__).parent
FIGURES_PATH = BASE_PATH / "figures"
FIGURES_PATH.mkdir(exist_ok=True)
# 输入文件
CLEAN_PATH = BASE_PATH / "01_clean.xlsx"
DEMAND_PATH = BASE_PATH / "02_demand.xlsx"
ALLOCATE_PATH = BASE_PATH / "03_allocate.xlsx"
METRICS_PATH = BASE_PATH / "04_metrics.xlsx"
SCHEDULE_PATH = BASE_PATH / "05_schedule.xlsx"
SENSITIVITY_PATH = BASE_PATH / "08_sensitivity.xlsx"
def export_fig1_points_js() -> Path:
"""
Export `fig1_points.js` used by `task1/fig1_carto.html`.
Data source: `task1/03_allocate.xlsx`.
"""
df = pd.read_excel(ALLOCATE_PATH).copy()
df["site_id"] = df["site_id"].astype(int)
df["k"] = df["k"].astype(int)
points = []
for _, r in df.iterrows():
points.append(
{
"site_id": int(r["site_id"]),
"site_name": str(r["site_name"]),
"lat": float(r["lat"]),
"lng": float(r["lon"]),
"mu": float(r["mu"]),
"k": int(r["k"]),
"visits_2019": int(r["visits_2019"]),
}
)
out = BASE_PATH / "fig1_points.js"
payload = (
"// Auto-generated from `task1/03_allocate.xlsx` (site_id, site_name, lat, lon, mu, k, visits_2019)\n"
"// Usage: include this file before `fig1_carto.html` rendering script.\n"
f"window.FIG1_POINTS = {json.dumps(points, ensure_ascii=False, separators=(',', ':'))};\n"
)
out.write_text(payload, encoding="utf-8")
return out
def fig1_site_map():
"""Fig.1: 站点地图"""
print(" 生成 Fig.1: 站点地图...")
df = pd.read_excel(ALLOCATE_PATH)
fig, ax = plt.subplots(figsize=(12, 10))
# 1. 设置地理纵横比 (核心修改)
avg_lat = df['lat'].mean()
# 修正经纬度比例y轴与x轴的比例
ax.set_aspect(1 / np.cos(np.radians(avg_lat)), adjustable='box')
# 散点图: 大小=μ, 颜色=k
scatter = ax.scatter(
df['lon'], df['lat'],
s=df['mu'] * 0.8,
c=df['k'],
cmap=_cmap_k(),
alpha=0.85,
edgecolors='white',
linewidths=0.7
)
# ... (标注高需求站点的代码保持不变) ...
# 颜色条
cbar = plt.colorbar(scatter, ax=ax, shrink=0.7) # 略微调小一点,防止挤压地图
cbar.set_label('Visit Frequency (k)', fontsize=12, color=TU["text"])
# ... (图例和标签代码保持不变) ...
ax.set_title('Fig.1: Site Map (Demand μ & Visit Frequency k)', fontsize=14, fontweight='bold')
ax.set_xlabel('Longitude', fontsize=12)
ax.set_ylabel('Latitude', fontsize=12)
style_axes(ax)
plt.tight_layout()
plt.savefig(FIGURES_PATH / 'fig1_site_map.png', dpi=150, bbox_inches='tight')
plt.close()
def fig2_demand_correction():
"""Fig.2: 需求修正对比"""
print(" 生成 Fig.2: 需求修正对比...")
df = pd.read_excel(DEMAND_PATH)
# 只显示被修正的站点
corrected = df[df['is_corrected']].copy()
corrected = corrected.sort_values('mu', ascending=False)
fig, ax = plt.subplots(figsize=(10, 6))
x = np.arange(len(corrected))
width = 0.35
bars1 = ax.bar(x - width/2, corrected['mu'], width, label='Original μ', color=TU["teal"], alpha=0.85, edgecolor="white", linewidth=0.6)
bars2 = ax.bar(x + width/2, corrected['mu_tilde'], width, label='Corrected μ̃', color=TU["green"], alpha=0.85, edgecolor="white", linewidth=0.6)
# 添加数值标签
for bar, val in zip(bars1, corrected['mu']):
ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 5, f'{val:.0f}',
ha='center', va='bottom', fontsize=9)
for bar, val in zip(bars2, corrected['mu_tilde']):
ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 5, f'{val:.0f}',
ha='center', va='bottom', fontsize=9, color=TU["green"])
# 添加p_trunc标注
for i, (_, row) in enumerate(corrected.iterrows()):
ax.text(i, max(row['mu'], row['mu_tilde']) + 25,
f"p={row['p_trunc']:.2%}",
ha='center', fontsize=8, style='italic')
ax.set_xlabel('Site', fontsize=12)
ax.set_ylabel('Demand per Visit', fontsize=12)
ax.set_title('Fig.2: Truncation Correction for High-Demand Sites', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels([name[:20] for name in corrected['site_name']], rotation=30, ha='right', fontsize=9)
ax.legend(fontsize=10)
ax.set_ylim(0, corrected['mu_tilde'].max() * 1.2)
style_axes(ax, grid_axis="y")
plt.tight_layout()
plt.savefig(FIGURES_PATH / 'fig2_demand_correction.png', dpi=150, bbox_inches='tight')
plt.close()
def fig3_k_distribution():
"""Fig.3: 频次分配分布"""
print(" 生成 Fig.3: 频次分配分布...")
df = pd.read_excel(ALLOCATE_PATH)
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# 左图: k的直方图
ax1 = axes[0]
bins = np.arange(df['k'].min() - 0.5, df['k'].max() + 1.5, 1)
ax1.hist(df['k'], bins=bins, color=TU["blue_mid"], edgecolor="white", alpha=0.85)
ax1.axvline(df['k'].mean(), color=TU["mauve"], linestyle='--', linewidth=2, label=f'Mean = {df["k"].mean():.1f}')
ax1.axvline(df['k'].median(), color=TU["olive"], linestyle=':', linewidth=2, label=f'Median = {df["k"].median():.0f}')
ax1.set_xlabel('Visit Frequency (k)', fontsize=12)
ax1.set_ylabel('Number of Sites', fontsize=12)
ax1.set_title('(a) Distribution of Visit Frequencies', fontsize=12)
ax1.legend(fontsize=10)
style_axes(ax1)
# 右图: k与μ̃的关系
ax2 = axes[1]
# mu_tilde already in allocate file
ax2.scatter(df['mu_tilde'], df['k'], alpha=0.75, s=65, c=TU["green"], edgecolors='white', linewidths=0.7)
# 拟合线
z = np.polyfit(df['mu_tilde'], df['k'], 1)
p = np.poly1d(z)
x_fit = np.linspace(df['mu_tilde'].min(), df['mu_tilde'].max(), 100)
ax2.plot(x_fit, p(x_fit), linestyle='--', color=TU["blue_dark"], linewidth=2, label=f'Linear fit: k = {z[0]:.3f}μ̃ + {z[1]:.1f}')
# 相关系数
corr = np.corrcoef(df['mu_tilde'], df['k'])[0, 1]
ax2.text(0.05, 0.95, f'r = {corr:.4f}', transform=ax2.transAxes, fontsize=11,
verticalalignment='top', bbox=dict(boxstyle='round', facecolor="#f3f4f6", edgecolor=TU["gray"], alpha=0.95))
ax2.set_xlabel('Corrected Demand (μ̃)', fontsize=12)
ax2.set_ylabel('Visit Frequency (k)', fontsize=12)
ax2.set_title('(b) k vs μ̃ (Proportionality Check)', fontsize=12)
ax2.legend(fontsize=10)
style_axes(ax2)
plt.suptitle('Fig.3: Visit Frequency Allocation Analysis', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig(FIGURES_PATH / 'fig3_k_distribution.png', dpi=150, bbox_inches='tight')
plt.close()
def fig4_efficiency_fairness():
"""Fig.4: 有效性-公平性权衡"""
print(" 生成 Fig.4: 有效性-公平性权衡...")
df = pd.read_excel(METRICS_PATH, sheet_name='metrics_summary')
fig, ax = plt.subplots(figsize=(8, 4.96))
# 绘制所有方案固定4个点采用显式样式便于控制图例与标注
from matplotlib.lines import Line2D
method_styles = [
{"key": "Recommended", "color": TU["blue_dark"], "marker": "o"},
{"key": "Baseline 1", "color": TU["mauve"], "marker": "s"},
{"key": "Baseline 2", "color": TU["olive"], "marker": "^"},
{"key": "Baseline 3", "color": TU["orange"], "marker": "D"},
]
def _style_for(method: str):
for s in method_styles:
if str(method).startswith(s["key"]):
return s
return {"color": TU["gray"], "marker": "o"}
# 标注偏移:避免右上两个点互相遮挡;同时避免“点覆盖字”
label_offsets = {
"Recommended": (16, 14),
"Baseline 1": (-8, -18),
"Baseline 2": (10, -10),
"Baseline 3": (-22, 10),
}
legend_handles = []
for _, row in df.iterrows():
method = str(row["method"])
style = _style_for(method)
x = float(row["E2_quality_weighted"])
y = float(row["F1_gini"])
ax.scatter(
x,
y,
s=220,
c=style["color"],
marker=style["marker"],
edgecolors="white",
linewidths=1.2,
zorder=4,
)
key = next((k for k in label_offsets.keys() if method.startswith(k)), "Recommended")
dx, dy = label_offsets.get(key, (14, 14))
ax.annotate(
f"E1={row['E1_total_service']:.0f}\nE2={row['E2_quality_weighted']:.0f}\nGini={row['F1_gini']:.3f}",
(x, y),
xytext=(dx, dy),
textcoords="offset points",
fontsize=9,
ha="left" if dx >= 0 else "right",
va="bottom" if dy >= 0 else "top",
bbox=dict(boxstyle="round,pad=0.28", facecolor="#f3f4f6", edgecolor=TU["gray"], alpha=0.96),
arrowprops=dict(arrowstyle="->", color=TU["gray"], lw=1.0, shrinkA=6, shrinkB=6),
zorder=6,
)
legend_handles.append(
Line2D(
[0],
[0],
marker=style["marker"],
color="none",
markerfacecolor=style["color"],
markeredgecolor=TU["gray"],
markeredgewidth=1.0,
markersize=11,
label=method,
)
)
# 添加权衡箭头
ax.annotate('', xy=(135000, 0.05), xytext=(105000, 0.30),
arrowprops=dict(arrowstyle='<->', color=TU["mauve"], lw=2))
ax.text(115000, 0.20, 'Efficiency-Fairness\nTradeoff', fontsize=10, ha='center',
color=TU["mauve"], style='italic', bbox=dict(facecolor='white', edgecolor='none', alpha=0.8, pad=2), zorder=10)
ax.set_xlabel('E2 (Quality-Weighted Service Volume)', fontsize=12)
ax.set_ylabel('F1 (Gini Coefficient, lower = fairer)', fontsize=12)
ax.set_title('Fig.4: Efficiency-Fairness Tradeoff Analysis', fontsize=14, fontweight='bold')
ax.legend(
handles=legend_handles,
loc="upper left",
fontsize=9.5,
labelspacing=0.6,
borderpad=0.6,
handletextpad=0.6,
framealpha=0.92,
)
style_axes(ax)
# 设置轴范围
ax.set_xlim(95000, 140000)
ax.set_ylim(0, 0.40)
plt.tight_layout()
plt.savefig(FIGURES_PATH / 'fig4_efficiency_fairness.png', dpi=150, bbox_inches='tight')
plt.close()
def fig5_calendar_heatmap():
"""Fig.5: 日历热力图"""
print(" 生成 Fig.5: 日历热力图...")
df_calendar = pd.read_excel(SCHEDULE_PATH, sheet_name='calendar')
df_allocate = pd.read_excel(ALLOCATE_PATH)
# 创建站点μ映射
mu_map = dict(zip(df_allocate['site_id'], df_allocate['mu']))
# 计算每天的总需求
daily_demand = []
for _, row in df_calendar.iterrows():
demand = 0
if pd.notna(row['site_1_id']):
demand += mu_map.get(int(row['site_1_id']), 0)
if pd.notna(row['site_2_id']):
demand += mu_map.get(int(row['site_2_id']), 0)
daily_demand.append(demand)
df_calendar['total_demand'] = daily_demand
# 创建12x31的热力图矩阵
heatmap_data = np.full((12, 31), np.nan)
for _, row in df_calendar.iterrows():
day = row['day']
# 简单映射: 假设每月30/31天
month = (day - 1) // 31
day_of_month = (day - 1) % 31
if month < 12:
heatmap_data[month, day_of_month] = row['total_demand']
fig, ax = plt.subplots(figsize=(14, 8))
im = ax.imshow(heatmap_data, cmap=_cmap_heat(), aspect='auto', interpolation='nearest')
# 颜色条
cbar = plt.colorbar(im, ax=ax, shrink=0.8)
cbar.set_label('Daily Total Demand (μ₁ + μ₂)', fontsize=11, color=TU["text"])
# 轴标签
ax.set_xticks(np.arange(31))
ax.set_xticklabels(np.arange(1, 32), fontsize=8)
ax.set_yticks(np.arange(12))
ax.set_yticklabels(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], fontsize=10)
ax.set_xlabel('Day of Month', fontsize=12)
ax.set_ylabel('Month', fontsize=12)
ax.set_title('Fig.5: Annual Schedule Calendar Heatmap (Daily Demand)', fontsize=14, fontweight='bold')
ax.grid(False)
plt.tight_layout()
plt.savefig(FIGURES_PATH / 'fig5_calendar_heatmap.png', dpi=150, bbox_inches='tight')
plt.close()
def fig6_gap_boxplot():
"""Fig.6: 访问间隔箱线图"""
print(" 生成 Fig.6: 访问间隔箱线图...")
df_gaps = pd.read_excel(SCHEDULE_PATH, sheet_name='gap_statistics')
# 过滤有效数据
df_valid = df_gaps[df_gaps['gap_mean'].notna()].copy()
# 按k分组
df_valid['k_group'] = pd.cut(df_valid['k'], bins=[0, 5, 10, 15, 20, 40],
labels=['1-5', '6-10', '11-15', '16-20', '21+'])
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
# 左图: 间隔均值按k分组的箱线图
ax1 = axes[0]
groups = df_valid.groupby('k_group')['gap_mean'].apply(list).values
group_labels = ['1-5', '6-10', '11-15', '16-20', '21+']
bp = ax1.boxplot([g for g in groups if len(g) > 0], labels=group_labels[:len(groups)],
patch_artist=True)
colors = _cmap_k()(np.linspace(0.2, 0.9, len(groups)))
for patch, color in zip(bp['boxes'], colors):
patch.set_facecolor(color)
patch.set_edgecolor("white")
patch.set_linewidth(0.8)
ax1.set_xlabel('Visit Frequency Group (k)', fontsize=12)
ax1.set_ylabel('Mean Gap (days)', fontsize=12)
ax1.set_title('(a) Mean Visit Interval by Frequency Group', fontsize=12)
style_axes(ax1)
# 右图: 间隔CV的分布
ax2 = axes[1]
ax2.hist(df_valid['gap_cv'], bins=20, color=TU["blue_mid"], edgecolor="white", alpha=0.85)
ax2.axvline(df_valid['gap_cv'].mean(), color=TU["mauve"], linestyle='--', linewidth=2,
label=f'Mean CV = {df_valid["gap_cv"].mean():.3f}')
ax2.axvline(df_valid['gap_cv'].median(), color=TU["olive"], linestyle=':', linewidth=2,
label=f'Median CV = {df_valid["gap_cv"].median():.3f}')
ax2.set_xlabel('Coefficient of Variation (CV) of Gaps', fontsize=12)
ax2.set_ylabel('Number of Sites', fontsize=12)
ax2.set_title('(b) Distribution of Gap Regularity (CV)', fontsize=12)
ax2.legend(fontsize=10)
style_axes(ax2)
plt.suptitle('Fig.6: Visit Interval Analysis', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig(FIGURES_PATH / 'fig6_gap_boxplot.png', dpi=150, bbox_inches='tight')
plt.close()
def fig7_sensitivity():
"""Fig.7: 敏感性分析"""
print(" 生成 Fig.7: 敏感性分析...")
# 读取敏感性分析结果
df_C = pd.read_excel(SENSITIVITY_PATH, sheet_name='sensitivity_C')
df_p = pd.read_excel(SENSITIVITY_PATH, sheet_name='sensitivity_p_thresh')
df_cbar = pd.read_excel(SENSITIVITY_PATH, sheet_name='sensitivity_c_bar')
df_base = pd.read_excel(SENSITIVITY_PATH, sheet_name='baseline').iloc[0]
base_C = int(df_base['C'])
base_p_thresh = float(df_base['p_thresh'])
base_c_bar = float(df_base['c_bar'])
base_E1 = float(df_base['E1'])
base_E2 = float(df_base['E2'])
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
# (a) C对E1的影响
ax1 = axes[0, 0]
ax1.plot(df_C['C'], df_C['E1'], 'o-', color=TU["blue_dark"], linewidth=2, markersize=7)
ax1.axhline(base_E1, color=TU["taupe"], linestyle='--', alpha=0.9, label=f'Baseline (C={base_C}, p={base_p_thresh:g})')
ax1.set_xlabel('Effective Capacity (C)', fontsize=11)
ax1.set_ylabel('E1 (Total Service Volume)', fontsize=11)
ax1.set_title('(a) Effect of C on E1', fontsize=12)
ax1.legend(fontsize=9)
style_axes(ax1)
# (b) C对修正站点数的影响
ax2 = axes[0, 1]
ax2.bar(df_C['C'].astype(str), df_C['n_corrected'], color=TU["green"], edgecolor="white", alpha=0.9, linewidth=0.7)
ax2.set_xlabel('Effective Capacity (C)', fontsize=11)
ax2.set_ylabel('Number of Corrected Sites', fontsize=11)
ax2.set_title('(b) Effect of C on Correction Count', fontsize=12)
style_axes(ax2, grid_axis="y")
# (c) p_thresh对指标的影响
ax3 = axes[1, 0]
ax3.plot(df_p['p_thresh'], df_p['E1'], 'o-', color=TU["teal"], linewidth=2, markersize=7, label='E1')
ax3.set_xlabel('Truncation Threshold (p_thresh)', fontsize=11)
ax3.set_ylabel('E1 (Total Service Volume)', fontsize=11)
ax3.set_title('(c) Effect of p_thresh on E1', fontsize=12)
ax3.legend(fontsize=9)
style_axes(ax3)
# (d) c_bar对E2的影响
ax4 = axes[1, 1]
ax4.plot(df_cbar['c_bar'], df_cbar['E2'], 's-', color=TU["mauve"], linewidth=2, markersize=7, label='E2')
ax4.axhline(base_E2, color=TU["taupe"], linestyle='--', alpha=0.9, label=f'Baseline (c̄={base_c_bar:g})')
ax4.set_xlabel('Quality Threshold (c̄)', fontsize=11)
ax4.set_ylabel('E2 (Quality-Weighted Service)', fontsize=11)
ax4.set_title('(d) Effect of c̄ on E2', fontsize=12)
ax4.legend(fontsize=9)
style_axes(ax4)
plt.suptitle('Fig.7: Sensitivity Analysis of Model Parameters', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig(FIGURES_PATH / 'fig7_sensitivity.png', dpi=150, bbox_inches='tight')
plt.close()
def main():
print("=" * 60)
print("Step 09: 可视化")
print("=" * 60)
print(f"\n输出目录: {FIGURES_PATH}")
# 生成所有图表
print("\n[1] 生成图表...")
apply_tu_theme()
js_path = export_fig1_points_js()
print(f" 已更新交互地图数据: {js_path.name}")
fig1_site_map()
fig2_demand_correction()
fig3_k_distribution()
fig4_efficiency_fairness()
fig5_calendar_heatmap()
fig6_gap_boxplot()
fig7_sensitivity()
# 列出生成的文件
print(f"\n[2] 已生成图表:")
for f in sorted(FIGURES_PATH.glob('*.png')):
print(f" {f.name}")
print("\n" + "=" * 60)
print("Step 09 完成")
print("=" * 60)
if __name__ == "__main__":
main()