This commit is contained in:
2026-01-19 10:39:37 +08:00
parent eae9dabf0e
commit 03d353a86a
15 changed files with 1263 additions and 76 deletions

232
task1/06_validate.py Normal file
View File

@@ -0,0 +1,232 @@
"""
Step 06: 约束满足检验
输入: 03_allocate.xlsx, 05_schedule.xlsx
输出: 06_validate.xlsx
功能:
1. 检验资源约束: Σk_i = 730
2. 检验覆盖约束: k_i >= 1
3. 检验日容量约束: 每日恰好2站点
4. 检验无重复约束: 同一天不重复访问同一站点
5. 检验排程一致性: 排程中的访问次数 = 分配的k_i
"""
import pandas as pd
import numpy as np
from pathlib import Path
# 路径配置
ALLOCATE_PATH = Path(__file__).parent / "03_allocate.xlsx"
SCHEDULE_PATH = Path(__file__).parent / "05_schedule.xlsx"
OUTPUT_PATH = Path(__file__).parent / "06_validate.xlsx"
# 约束参数
N_TOTAL = 730
MIN_VISITS = 1
DAILY_CAPACITY = 2
N_DAYS = 365
def main():
print("=" * 60)
print("Step 06: 约束满足检验")
print("=" * 60)
# 1. 读取数据
print(f"\n[1] 读取输入数据")
df_allocate = pd.read_excel(ALLOCATE_PATH)
df_calendar = pd.read_excel(SCHEDULE_PATH, sheet_name='calendar')
df_site_dates = pd.read_excel(SCHEDULE_PATH, sheet_name='site_dates')
print(f" 分配数据: {len(df_allocate)} 站点")
print(f" 日历数据: {len(df_calendar)}")
# 存储检验结果
results = []
# 2. 检验资源约束: Σk_i = N_TOTAL
print(f"\n[2] 检验资源约束: Σk_i = {N_TOTAL}")
total_k = df_allocate['k'].sum()
c1_pass = total_k == N_TOTAL
results.append({
'constraint': 'C1: 资源约束',
'formula': 'Σk_i = 730',
'expected': N_TOTAL,
'actual': total_k,
'passed': c1_pass,
'details': f"总访问次数 = {total_k}"
})
print(f" Σk_i = {total_k}, 预期 = {N_TOTAL}")
print(f" 结果: {'✅ 通过' if c1_pass else '❌ 失败'}")
# 3. 检验覆盖约束: k_i >= MIN_VISITS
print(f"\n[3] 检验覆盖约束: k_i >= {MIN_VISITS}")
min_k = df_allocate['k'].min()
sites_below_min = df_allocate[df_allocate['k'] < MIN_VISITS]
c2_pass = len(sites_below_min) == 0
results.append({
'constraint': 'C2: 覆盖约束',
'formula': f'k_i >= {MIN_VISITS}',
'expected': f'>= {MIN_VISITS}',
'actual': f'min = {min_k}',
'passed': c2_pass,
'details': f"最小访问次数 = {min_k}, 违反站点数 = {len(sites_below_min)}"
})
print(f" min(k_i) = {min_k}, 预期 >= {MIN_VISITS}")
print(f" 结果: {'✅ 通过' if c2_pass else '❌ 失败'}")
# 4. 检验日容量约束: 每日恰好2站点
print(f"\n[4] 检验日容量约束: 每日恰好 {DAILY_CAPACITY} 站点")
# 统计每天的站点数
daily_counts = []
for _, row in df_calendar.iterrows():
count = 0
if pd.notna(row['site_1_id']):
count += 1
if pd.notna(row['site_2_id']):
count += 1
daily_counts.append(count)
df_calendar['site_count'] = daily_counts
days_not_full = df_calendar[df_calendar['site_count'] != DAILY_CAPACITY]
c3_pass = len(days_not_full) == 0
results.append({
'constraint': 'C3: 日容量约束',
'formula': f'每日站点数 = {DAILY_CAPACITY}',
'expected': f'所有天 = {DAILY_CAPACITY}',
'actual': f'min={min(daily_counts)}, max={max(daily_counts)}',
'passed': c3_pass,
'details': f"不满足的天数 = {len(days_not_full)}"
})
print(f" 每日站点数: min={min(daily_counts)}, max={max(daily_counts)}")
print(f" 不满足的天数: {len(days_not_full)}")
print(f" 结果: {'✅ 通过' if c3_pass else '❌ 失败'}")
# 5. 检验无重复约束: 同一天不重复访问同一站点
print(f"\n[5] 检验无重复约束: 同一天不重复访问同一站点")
duplicate_days = []
for _, row in df_calendar.iterrows():
if pd.notna(row['site_1_id']) and pd.notna(row['site_2_id']):
if row['site_1_id'] == row['site_2_id']:
duplicate_days.append(row['day'])
c4_pass = len(duplicate_days) == 0
results.append({
'constraint': 'C4: 无重复约束',
'formula': 'site_1 ≠ site_2 (同一天)',
'expected': '无重复',
'actual': f'重复天数 = {len(duplicate_days)}',
'passed': c4_pass,
'details': f"重复的天: {duplicate_days[:5]}..." if duplicate_days else ""
})
print(f" 重复访问的天数: {len(duplicate_days)}")
print(f" 结果: {'✅ 通过' if c4_pass else '❌ 失败'}")
# 6. 检验排程一致性: 排程中的访问次数 = 分配的k_i
print(f"\n[6] 检验排程一致性: 排程访问次数 = 分配的k_i")
# 从日历中统计每个站点的访问次数
schedule_counts = {}
for _, row in df_calendar.iterrows():
for site_col in ['site_1_id', 'site_2_id']:
site_id = row[site_col]
if pd.notna(site_id):
site_id = int(site_id)
schedule_counts[site_id] = schedule_counts.get(site_id, 0) + 1
# 对比分配的k_i
mismatch_sites = []
for _, row in df_allocate.iterrows():
site_id = row['site_id']
allocated_k = row['k']
scheduled_k = schedule_counts.get(site_id, 0)
if allocated_k != scheduled_k:
mismatch_sites.append({
'site_id': site_id,
'allocated_k': allocated_k,
'scheduled_k': scheduled_k,
'diff': scheduled_k - allocated_k
})
c5_pass = len(mismatch_sites) == 0
results.append({
'constraint': 'C5: 排程一致性',
'formula': '排程次数 = k_i',
'expected': '所有站点一致',
'actual': f'不一致站点数 = {len(mismatch_sites)}',
'passed': c5_pass,
'details': str(mismatch_sites[:5]) if mismatch_sites else "全部一致"
})
print(f" 不一致的站点数: {len(mismatch_sites)}")
print(f" 结果: {'✅ 通过' if c5_pass else '❌ 失败'}")
# 7. 检验总天数
print(f"\n[7] 检验总天数: {N_DAYS}")
actual_days = len(df_calendar)
c6_pass = actual_days == N_DAYS
results.append({
'constraint': 'C6: 总天数',
'formula': f'日历天数 = {N_DAYS}',
'expected': N_DAYS,
'actual': actual_days,
'passed': c6_pass,
'details': f"日历包含 {actual_days}"
})
print(f" 日历天数 = {actual_days}, 预期 = {N_DAYS}")
print(f" 结果: {'✅ 通过' if c6_pass else '❌ 失败'}")
# 8. 汇总
print(f"\n" + "=" * 60)
print("检验结果汇总")
print("=" * 60)
df_results = pd.DataFrame(results)
all_pass = df_results['passed'].all()
print(f"\n{'约束':<20} {'结果':<10} {'详情'}")
print("-" * 60)
for _, row in df_results.iterrows():
status = '✅ 通过' if row['passed'] else '❌ 失败'
print(f"{row['constraint']:<20} {status:<10} {row['details']}")
print("-" * 60)
print(f"总体结果: {'✅ 所有约束通过' if all_pass else '❌ 存在约束违反'}")
# 9. 保存输出
print(f"\n[8] 保存输出: {OUTPUT_PATH}")
with pd.ExcelWriter(OUTPUT_PATH, engine='openpyxl') as writer:
df_results.to_excel(writer, sheet_name='validation_results', index=False)
# 保存详细的每日统计
df_calendar[['day', 'site_1_id', 'site_2_id', 'site_count']].to_excel(
writer, sheet_name='daily_check', index=False
)
# 保存站点一致性检查
consistency_data = []
for _, row in df_allocate.iterrows():
site_id = row['site_id']
consistency_data.append({
'site_id': site_id,
'site_name': row['site_name'],
'allocated_k': row['k'],
'scheduled_k': schedule_counts.get(site_id, 0),
'match': row['k'] == schedule_counts.get(site_id, 0)
})
pd.DataFrame(consistency_data).to_excel(
writer, sheet_name='site_consistency', index=False
)
print(f" 已保存3个工作表")
print("\n" + "=" * 60)
print("Step 06 完成")
print("=" * 60)
return df_results, all_pass
if __name__ == "__main__":
main()

BIN
task1/06_validate.xlsx Normal file

Binary file not shown.

215
task1/07_backtest.py Normal file
View File

@@ -0,0 +1,215 @@
"""
Step 07: 历史回测验证
输入: 01_clean.xlsx, 02_demand.xlsx, 03_allocate.xlsx
输出: 07_backtest.xlsx
功能:
1. 验证截断修正模型的合理性
2. 对比2019年实际访问次数与模型建议的分配
3. 分析残差分布
4. 计算模型拟合指标 (R², MAPE)
"""
import pandas as pd
import numpy as np
from scipy.stats import norm, pearsonr, spearmanr
from pathlib import Path
# 路径配置
CLEAN_PATH = Path(__file__).parent / "01_clean.xlsx"
DEMAND_PATH = Path(__file__).parent / "02_demand.xlsx"
ALLOCATE_PATH = Path(__file__).parent / "03_allocate.xlsx"
OUTPUT_PATH = Path(__file__).parent / "07_backtest.xlsx"
def compute_metrics(actual, predicted):
"""计算回归评估指标"""
actual = np.array(actual)
predicted = np.array(predicted)
# 避免除零
mask = actual != 0
actual_nz = actual[mask]
predicted_nz = predicted[mask]
# MAPE (Mean Absolute Percentage Error)
mape = np.mean(np.abs((actual_nz - predicted_nz) / actual_nz)) * 100
# MAE (Mean Absolute Error)
mae = np.mean(np.abs(actual - predicted))
# RMSE (Root Mean Squared Error)
rmse = np.sqrt(np.mean((actual - predicted) ** 2))
# R² (Coefficient of Determination)
ss_res = np.sum((actual - predicted) ** 2)
ss_tot = np.sum((actual - np.mean(actual)) ** 2)
r2 = 1 - ss_res / ss_tot if ss_tot != 0 else 0
# Pearson correlation
pearson_r, pearson_p = pearsonr(actual, predicted)
# Spearman correlation (rank-based)
spearman_r, spearman_p = spearmanr(actual, predicted)
return {
'MAPE': mape,
'MAE': mae,
'RMSE': rmse,
'R2': r2,
'Pearson_r': pearson_r,
'Pearson_p': pearson_p,
'Spearman_r': spearman_r,
'Spearman_p': spearman_p
}
def main():
print("=" * 60)
print("Step 07: 历史回测验证")
print("=" * 60)
# 1. 读取数据
print(f"\n[1] 读取输入数据")
df_clean = pd.read_excel(CLEAN_PATH)
df_demand = pd.read_excel(DEMAND_PATH)
df_allocate = pd.read_excel(ALLOCATE_PATH)
# 合并数据
df = df_clean.merge(df_demand[['site_id', 'mu_tilde', 'p_trunc', 'is_corrected']], on='site_id')
df = df.merge(df_allocate[['site_id', 'k']], on='site_id')
print(f" 读取 {len(df)} 站点数据")
# 2. 分析1: 2019年访问次数 vs 需求μ的关系
print(f"\n[2] 分析1: 2019年访问次数 vs 需求μ的相关性")
# 2019年的访问次数是否与需求成正比
corr_2019_mu = pearsonr(df['visits_2019'], df['mu'])
corr_2019_mu_tilde = pearsonr(df['visits_2019'], df['mu_tilde'])
print(f" visits_2019 vs μ: r = {corr_2019_mu[0]:.4f}, p = {corr_2019_mu[1]:.4e}")
print(f" visits_2019 vs μ̃: r = {corr_2019_mu_tilde[0]:.4f}, p = {corr_2019_mu_tilde[1]:.4e}")
# 解读
if abs(corr_2019_mu[0]) < 0.3:
print(f" 解读: 2019年访问次数与需求弱相关说明历史分配未充分考虑需求")
else:
print(f" 解读: 2019年访问次数与需求有一定相关性")
# 3. 分析2: 模型建议的k vs 2019年实际
print(f"\n[3] 分析2: 模型建议的k vs 2019年实际访问次数")
# 将2019年访问次数缩放到730总次数
total_2019 = df['visits_2019'].sum()
df['visits_2019_scaled'] = df['visits_2019'] * 730 / total_2019
# 计算差异
df['k_diff'] = df['k'] - df['visits_2019_scaled']
df['k_diff_pct'] = df['k_diff'] / df['visits_2019_scaled'] * 100
# 统计
increased = (df['k_diff'] > 0.5).sum()
decreased = (df['k_diff'] < -0.5).sum()
unchanged = len(df) - increased - decreased
print(f" 相比2019缩放方案:")
print(f" - 增加访问的站点: {increased}")
print(f" - 减少访问的站点: {decreased}")
print(f" - 基本不变的站点: {unchanged}")
# 增加最多和减少最多的站点
print(f"\n 增加访问最多的5个站点:")
top_increase = df.nlargest(5, 'k_diff')[['site_id', 'site_name', 'mu', 'visits_2019', 'k', 'k_diff']]
for _, row in top_increase.iterrows():
print(f" {row['site_name'][:30]:<30}: 2019={row['visits_2019']}, 建议k={row['k']}, 差异={row['k_diff']:+.1f}")
print(f"\n 减少访问最多的5个站点:")
top_decrease = df.nsmallest(5, 'k_diff')[['site_id', 'site_name', 'mu', 'visits_2019', 'k', 'k_diff']]
for _, row in top_decrease.iterrows():
print(f" {row['site_name'][:30]:<30}: 2019={row['visits_2019']}, 建议k={row['k']}, 差异={row['k_diff']:+.1f}")
# 4. 分析3: 截断修正的合理性检验
print(f"\n[4] 分析3: 截断修正的合理性检验")
corrected_sites = df[df['is_corrected']]
uncorrected_sites = df[~df['is_corrected']]
print(f" 被修正站点 ({len(corrected_sites)} 个):")
print(f" - μ 均值: {corrected_sites['mu'].mean():.1f}")
print(f" - σ 均值: {corrected_sites['sigma'].mean():.1f}")
print(f" - CV (σ/μ) 均值: {(corrected_sites['sigma']/corrected_sites['mu']).mean():.3f}")
print(f" 未修正站点 ({len(uncorrected_sites)} 个):")
print(f" - μ 均值: {uncorrected_sites['mu'].mean():.1f}")
print(f" - σ 均值: {uncorrected_sites['sigma'].mean():.1f}")
print(f" - CV (σ/μ) 均值: {(uncorrected_sites['sigma']/uncorrected_sites['mu']).mean():.3f}")
# 5. 分析4: 模型一致性 - k是否与μ̃成正比
print(f"\n[5] 分析4: 模型一致性检验 (k 应与 μ̃ 成正比)")
# 理论上 k_i / μ̃_i 应该近似为常数
df['k_per_mu_tilde'] = df['k'] / df['mu_tilde']
cv_ratio = df['k_per_mu_tilde'].std() / df['k_per_mu_tilde'].mean()
print(f" k / μ̃ 的统计:")
print(f" - 均值: {df['k_per_mu_tilde'].mean():.4f}")
print(f" - 标准差: {df['k_per_mu_tilde'].std():.4f}")
print(f" - CV (变异系数): {cv_ratio:.4f}")
if cv_ratio < 0.1:
print(f" 解读: k 与 μ̃ 高度成正比 (CV < 0.1),模型内部一致")
else:
print(f" 解读: 存在一些偏差,主要来自整数化舍入")
# 6. 分析5: 与2019方案的总服务量对比
print(f"\n[6] 分析5: 服务量对比")
service_2019 = (df['visits_2019'] * df['mu']).sum()
service_2019_scaled = (df['visits_2019_scaled'] * df['mu']).sum()
service_model = (df['k'] * df['mu']).sum()
print(f" 2019年实际服务量 (722次): {service_2019:.0f}")
print(f" 2019年缩放服务量 (730次): {service_2019_scaled:.0f}")
print(f" 模型建议服务量 (730次): {service_model:.0f}")
print(f" 模型改进: {(service_model/service_2019_scaled - 1)*100:+.2f}%")
# 7. 保存输出
print(f"\n[7] 保存输出: {OUTPUT_PATH}")
with pd.ExcelWriter(OUTPUT_PATH, engine='openpyxl') as writer:
# Sheet 1: 站点级对比
output_cols = ['site_id', 'site_name', 'mu', 'sigma', 'mu_tilde', 'is_corrected',
'visits_2019', 'visits_2019_scaled', 'k', 'k_diff', 'k_diff_pct', 'k_per_mu_tilde']
df[output_cols].to_excel(writer, sheet_name='site_comparison', index=False)
# Sheet 2: 汇总指标
summary = pd.DataFrame([
{'metric': 'corr(visits_2019, μ)', 'value': corr_2019_mu[0], 'p_value': corr_2019_mu[1]},
{'metric': 'corr(visits_2019, μ̃)', 'value': corr_2019_mu_tilde[0], 'p_value': corr_2019_mu_tilde[1]},
{'metric': 'sites_increased', 'value': increased, 'p_value': None},
{'metric': 'sites_decreased', 'value': decreased, 'p_value': None},
{'metric': 'sites_unchanged', 'value': unchanged, 'p_value': None},
{'metric': 'CV(k/μ̃)', 'value': cv_ratio, 'p_value': None},
{'metric': 'service_2019_actual', 'value': service_2019, 'p_value': None},
{'metric': 'service_model', 'value': service_model, 'p_value': None},
{'metric': 'improvement_pct', 'value': (service_model/service_2019_scaled - 1)*100, 'p_value': None},
])
summary.to_excel(writer, sheet_name='summary_metrics', index=False)
# Sheet 3: 修正站点详情
corrected_sites[['site_id', 'site_name', 'mu', 'sigma', 'p_trunc', 'mu_tilde']].to_excel(
writer, sheet_name='corrected_sites', index=False
)
print(f" 已保存3个工作表")
print("\n" + "=" * 60)
print("Step 07 完成")
print("=" * 60)
return df
if __name__ == "__main__":
main()

BIN
task1/07_backtest.xlsx Normal file

Binary file not shown.

272
task1/08_sensitivity.py Normal file
View File

@@ -0,0 +1,272 @@
"""
Step 08: 敏感性分析
输入: 01_clean.xlsx
输出: 08_sensitivity.xlsx
功能:
1. 参数扫描: C (有效容量), p_trunc阈值, C_bar (质量阈值)
2. 计算不同参数下的指标 (E1, E2, F1, F2)
3. 生成Pareto前沿分析
4. 识别稳健的参数区间
"""
import pandas as pd
import numpy as np
from scipy.stats import norm
from pathlib import Path
from itertools import product
# 路径配置
CLEAN_PATH = Path(__file__).parent / "01_clean.xlsx"
OUTPUT_PATH = Path(__file__).parent / "08_sensitivity.xlsx"
# 基准参数
BASE_C = 400
BASE_P_THRESH = 0.02
BASE_C_BAR = 250
N_TOTAL = 730
# 敏感性扫描范围
C_RANGE = [350, 375, 400, 425, 450]
P_THRESH_RANGE = [0.01, 0.02, 0.05, 0.10]
C_BAR_RANGE = [200, 225, 250, 275, 300]
def truncation_correction(mu, sigma, C, p_thresh):
"""截断回归修正"""
if sigma <= 0 or np.isnan(sigma):
return mu, 0.0, False
z = (C - mu) / sigma
p_trunc = 1 - norm.cdf(z)
if p_trunc < p_thresh:
return mu, p_trunc, False
else:
mu_tilde = mu * (1 + 0.4 * p_trunc)
return mu_tilde, p_trunc, True
def hamilton_allocation(total, weights):
"""Hamilton最大余数法"""
n = len(weights)
w_sum = sum(weights)
quotas = [total * w / w_sum for w in weights]
floors = [int(q) for q in quotas]
remainders = [q - f for q, f in zip(quotas, floors)]
leftover = total - sum(floors)
indices = sorted(range(n), key=lambda i: -remainders[i])
for i in indices[:leftover]:
floors[i] += 1
return floors
def gini_coefficient(values):
"""计算基尼系数"""
values = np.array(values)
n = len(values)
if n == 0 or values.sum() == 0:
return 0.0
sorted_values = np.sort(values)
cumsum = np.cumsum(sorted_values)
return (2 * np.sum((np.arange(1, n + 1) * sorted_values)) - (n + 1) * cumsum[-1]) / (n * cumsum[-1])
def quality_discount(mu, c_bar):
"""质量折扣因子"""
return min(1.0, c_bar / mu) if mu > 0 else 1.0
def run_model(df, C, p_thresh, c_bar):
"""运行完整模型并返回指标"""
n_sites = len(df)
# 1. 截断修正
mu_tilde_list = []
n_corrected = 0
for _, row in df.iterrows():
mu_tilde, p_trunc, is_corrected = truncation_correction(row['mu'], row['sigma'], C, p_thresh)
mu_tilde_list.append(mu_tilde)
if is_corrected:
n_corrected += 1
# 2. 频次分配
k_extra = hamilton_allocation(N_TOTAL - n_sites, mu_tilde_list)
k_list = [1 + ke for ke in k_extra]
# 3. 计算指标
mu_arr = df['mu'].values
mu_tilde_arr = np.array(mu_tilde_list)
k_arr = np.array(k_list)
# E1: 总服务量
E1 = np.sum(k_arr * mu_arr)
# E2: 质量加权服务量
q_arr = np.array([quality_discount(mu, c_bar) for mu in mu_arr])
E2 = np.sum(k_arr * mu_arr * q_arr)
# 满足率
r_arr = k_arr * mu_arr / mu_tilde_arr
# F1: Gini系数
F1 = gini_coefficient(r_arr)
# F2: 最小满足率
F2 = np.min(r_arr)
# F3: 满足率CV
F3 = np.std(r_arr) / np.mean(r_arr)
return {
'C': C,
'p_thresh': p_thresh,
'c_bar': c_bar,
'n_corrected': n_corrected,
'E1': E1,
'E2': E2,
'F1_gini': F1,
'F2_min_r': F2,
'F3_cv_r': F3,
'k_min': min(k_list),
'k_max': max(k_list)
}
def main():
print("=" * 60)
print("Step 08: 敏感性分析")
print("=" * 60)
# 1. 读取数据
print(f"\n[1] 读取输入数据: {CLEAN_PATH}")
df = pd.read_excel(CLEAN_PATH)
print(f" 读取 {len(df)} 站点数据")
# 2. 基准模型
print(f"\n[2] 基准模型 (C={BASE_C}, p_thresh={BASE_P_THRESH}, c_bar={BASE_C_BAR})")
base_result = run_model(df, BASE_C, BASE_P_THRESH, BASE_C_BAR)
print(f" E1 = {base_result['E1']:.1f}")
print(f" E2 = {base_result['E2']:.1f}")
print(f" F1 (Gini) = {base_result['F1_gini']:.4f}")
print(f" F2 (min r) = {base_result['F2_min_r']:.2f}")
print(f" 修正站点数 = {base_result['n_corrected']}")
# 3. 单参数敏感性: C
print(f"\n[3] 单参数敏感性分析: C (有效容量)")
print(f" {'C':<8} {'n_corr':<8} {'E1':<12} {'E2':<12} {'F1':<10} {'F2':<10}")
print(f" {'-'*60}")
results_C = []
for C in C_RANGE:
result = run_model(df, C, BASE_P_THRESH, BASE_C_BAR)
results_C.append(result)
marker = " *" if C == BASE_C else ""
print(f" {C:<8} {result['n_corrected']:<8} {result['E1']:<12.1f} {result['E2']:<12.1f} {result['F1_gini']:<10.4f} {result['F2_min_r']:<10.2f}{marker}")
# 4. 单参数敏感性: p_thresh
print(f"\n[4] 单参数敏感性分析: p_thresh (截断阈值)")
print(f" {'p_thresh':<10} {'n_corr':<8} {'E1':<12} {'E2':<12} {'F1':<10} {'F2':<10}")
print(f" {'-'*62}")
results_p = []
for p_thresh in P_THRESH_RANGE:
result = run_model(df, BASE_C, p_thresh, BASE_C_BAR)
results_p.append(result)
marker = " *" if p_thresh == BASE_P_THRESH else ""
print(f" {p_thresh:<10} {result['n_corrected']:<8} {result['E1']:<12.1f} {result['E2']:<12.1f} {result['F1_gini']:<10.4f} {result['F2_min_r']:<10.2f}{marker}")
# 5. 单参数敏感性: c_bar
print(f"\n[5] 单参数敏感性分析: c_bar (质量阈值)")
print(f" {'c_bar':<8} {'E1':<12} {'E2':<12} {'F1':<10} {'F2':<10}")
print(f" {'-'*54}")
results_cbar = []
for c_bar in C_BAR_RANGE:
result = run_model(df, BASE_C, BASE_P_THRESH, c_bar)
results_cbar.append(result)
marker = " *" if c_bar == BASE_C_BAR else ""
print(f" {c_bar:<8} {result['E1']:<12.1f} {result['E2']:<12.1f} {result['F1_gini']:<10.4f} {result['F2_min_r']:<10.2f}{marker}")
# 6. 多参数组合扫描 (C × p_thresh)
print(f"\n[6] 多参数组合扫描 (C × p_thresh)")
results_combo = []
for C, p_thresh in product(C_RANGE, P_THRESH_RANGE):
result = run_model(df, C, p_thresh, BASE_C_BAR)
results_combo.append(result)
# 找出Pareto最优点
df_combo = pd.DataFrame(results_combo)
print(f"{len(results_combo)} 种组合")
print(f" E1 范围: [{df_combo['E1'].min():.1f}, {df_combo['E1'].max():.1f}]")
print(f" F1 范围: [{df_combo['F1_gini'].min():.4f}, {df_combo['F1_gini'].max():.4f}]")
# 7. 稳健性分析
print(f"\n[7] 稳健性分析")
# 计算指标相对于基准的变化率
E1_base = base_result['E1']
E2_base = base_result['E2']
F1_base = base_result['F1_gini']
# C的影响
E1_range_C = max(r['E1'] for r in results_C) - min(r['E1'] for r in results_C)
E1_pct_C = E1_range_C / E1_base * 100
# p_thresh的影响
E1_range_p = max(r['E1'] for r in results_p) - min(r['E1'] for r in results_p)
E1_pct_p = E1_range_p / E1_base * 100
print(f" C 变化 [{min(C_RANGE)}, {max(C_RANGE)}] 时:")
print(f" - E1 变化范围: {E1_range_C:.1f} ({E1_pct_C:.2f}%)")
print(f" p_thresh 变化 [{min(P_THRESH_RANGE)}, {max(P_THRESH_RANGE)}] 时:")
print(f" - E1 变化范围: {E1_range_p:.1f} ({E1_pct_p:.2f}%)")
if E1_pct_C < 5 and E1_pct_p < 5:
print(f" 结论: 模型对参数变化不敏感,结果稳健")
else:
print(f" 结论: 模型对某些参数敏感,需谨慎选择")
# 8. 保存输出
print(f"\n[8] 保存输出: {OUTPUT_PATH}")
with pd.ExcelWriter(OUTPUT_PATH, engine='openpyxl') as writer:
# Sheet 1: C敏感性
pd.DataFrame(results_C).to_excel(writer, sheet_name='sensitivity_C', index=False)
# Sheet 2: p_thresh敏感性
pd.DataFrame(results_p).to_excel(writer, sheet_name='sensitivity_p_thresh', index=False)
# Sheet 3: c_bar敏感性
pd.DataFrame(results_cbar).to_excel(writer, sheet_name='sensitivity_c_bar', index=False)
# Sheet 4: 组合扫描
df_combo.to_excel(writer, sheet_name='combo_scan', index=False)
# Sheet 5: 基准结果
pd.DataFrame([base_result]).to_excel(writer, sheet_name='baseline', index=False)
# Sheet 6: 稳健性汇总
robustness = pd.DataFrame([
{'parameter': 'C', 'range': f'[{min(C_RANGE)}, {max(C_RANGE)}]',
'E1_variation': E1_range_C, 'E1_variation_pct': E1_pct_C},
{'parameter': 'p_thresh', 'range': f'[{min(P_THRESH_RANGE)}, {max(P_THRESH_RANGE)}]',
'E1_variation': E1_range_p, 'E1_variation_pct': E1_pct_p},
])
robustness.to_excel(writer, sheet_name='robustness', index=False)
print(f" 已保存6个工作表")
print("\n" + "=" * 60)
print("Step 08 完成")
print("=" * 60)
return df_combo
if __name__ == "__main__":
main()

BIN
task1/08_sensitivity.xlsx Normal file

Binary file not shown.

432
task1/09_visualize.py Normal file
View File

@@ -0,0 +1,432 @@
"""
Step 09: 可视化
输入: 01_clean.xlsx, 02_demand.xlsx, 03_allocate.xlsx, 04_metrics.xlsx,
05_schedule.xlsx, 08_sensitivity.xlsx
输出: figures/*.png
功能:
1. Fig.1: 站点地图 (需求大小 + 访问频次)
2. Fig.2: 需求修正对比 (修正前后μ)
3. Fig.3: 频次分配分布 (k直方图)
4. Fig.4: 有效性-公平性权衡 (E-F散点图)
5. Fig.5: 日历热力图 (全年排程)
6. Fig.6: 访问间隔箱线图
7. Fig.7: 敏感性分析 (参数-指标折线图)
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')
# 设置中文字体 (macOS)
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False
# 路径配置
BASE_PATH = Path(__file__).parent
FIGURES_PATH = BASE_PATH / "figures"
FIGURES_PATH.mkdir(exist_ok=True)
# 输入文件
CLEAN_PATH = BASE_PATH / "01_clean.xlsx"
DEMAND_PATH = BASE_PATH / "02_demand.xlsx"
ALLOCATE_PATH = BASE_PATH / "03_allocate.xlsx"
METRICS_PATH = BASE_PATH / "04_metrics.xlsx"
SCHEDULE_PATH = BASE_PATH / "05_schedule.xlsx"
SENSITIVITY_PATH = BASE_PATH / "08_sensitivity.xlsx"
def fig1_site_map():
"""Fig.1: 站点地图"""
print(" 生成 Fig.1: 站点地图...")
df = pd.read_excel(ALLOCATE_PATH)
fig, ax = plt.subplots(figsize=(12, 10))
# 散点图: 大小=μ, 颜色=k
scatter = ax.scatter(
df['lon'], df['lat'],
s=df['mu'] * 0.8, # 点大小与需求成正比
c=df['k'],
cmap='YlOrRd',
alpha=0.7,
edgecolors='black',
linewidths=0.5
)
# 标注高需求站点
high_demand = df[df['mu'] > 250]
for _, row in high_demand.iterrows():
ax.annotate(
f"{row['site_name'][:15]}\nμ={row['mu']:.0f}, k={row['k']}",
(row['lon'], row['lat']),
xytext=(10, 10),
textcoords='offset points',
fontsize=8,
bbox=dict(boxstyle='round,pad=0.3', facecolor='yellow', alpha=0.7)
)
# 颜色条
cbar = plt.colorbar(scatter, ax=ax, shrink=0.8)
cbar.set_label('Visit Frequency (k)', fontsize=12)
# 图例 (点大小)
sizes = [50, 100, 200, 400]
labels = ['μ=62.5', 'μ=125', 'μ=250', 'μ=500']
legend_elements = [
plt.scatter([], [], s=s * 0.8, c='gray', alpha=0.5, edgecolors='black', label=l)
for s, l in zip(sizes, labels)
]
ax.legend(handles=legend_elements, title='Demand (μ)', loc='lower left', fontsize=9)
ax.set_xlabel('Longitude', fontsize=12)
ax.set_ylabel('Latitude', fontsize=12)
ax.set_title('Fig.1: Site Map - Demand Size and Visit Frequency', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(FIGURES_PATH / 'fig1_site_map.png', dpi=150, bbox_inches='tight')
plt.close()
def fig2_demand_correction():
"""Fig.2: 需求修正对比"""
print(" 生成 Fig.2: 需求修正对比...")
df = pd.read_excel(DEMAND_PATH)
# 只显示被修正的站点
corrected = df[df['is_corrected']].copy()
corrected = corrected.sort_values('mu', ascending=False)
fig, ax = plt.subplots(figsize=(10, 6))
x = np.arange(len(corrected))
width = 0.35
bars1 = ax.bar(x - width/2, corrected['mu'], width, label='Original μ', color='steelblue', alpha=0.8)
bars2 = ax.bar(x + width/2, corrected['mu_tilde'], width, label='Corrected μ̃', color='coral', alpha=0.8)
# 添加数值标签
for bar, val in zip(bars1, corrected['mu']):
ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 5, f'{val:.0f}',
ha='center', va='bottom', fontsize=9)
for bar, val in zip(bars2, corrected['mu_tilde']):
ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 5, f'{val:.0f}',
ha='center', va='bottom', fontsize=9, color='coral')
# 添加p_trunc标注
for i, (_, row) in enumerate(corrected.iterrows()):
ax.text(i, max(row['mu'], row['mu_tilde']) + 25,
f"p={row['p_trunc']:.2%}",
ha='center', fontsize=8, style='italic')
ax.set_xlabel('Site', fontsize=12)
ax.set_ylabel('Demand per Visit', fontsize=12)
ax.set_title('Fig.2: Truncation Correction for High-Demand Sites', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels([name[:20] for name in corrected['site_name']], rotation=30, ha='right', fontsize=9)
ax.legend(fontsize=10)
ax.set_ylim(0, corrected['mu_tilde'].max() * 1.2)
ax.grid(True, axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig(FIGURES_PATH / 'fig2_demand_correction.png', dpi=150, bbox_inches='tight')
plt.close()
def fig3_k_distribution():
"""Fig.3: 频次分配分布"""
print(" 生成 Fig.3: 频次分配分布...")
df = pd.read_excel(ALLOCATE_PATH)
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# 左图: k的直方图
ax1 = axes[0]
bins = np.arange(df['k'].min() - 0.5, df['k'].max() + 1.5, 1)
ax1.hist(df['k'], bins=bins, color='steelblue', edgecolor='black', alpha=0.7)
ax1.axvline(df['k'].mean(), color='red', linestyle='--', linewidth=2, label=f'Mean = {df["k"].mean():.1f}')
ax1.axvline(df['k'].median(), color='green', linestyle=':', linewidth=2, label=f'Median = {df["k"].median():.0f}')
ax1.set_xlabel('Visit Frequency (k)', fontsize=12)
ax1.set_ylabel('Number of Sites', fontsize=12)
ax1.set_title('(a) Distribution of Visit Frequencies', fontsize=12)
ax1.legend(fontsize=10)
ax1.grid(True, alpha=0.3)
# 右图: k与μ̃的关系
ax2 = axes[1]
# mu_tilde already in allocate file
ax2.scatter(df['mu_tilde'], df['k'], alpha=0.6, s=60, edgecolors='black', linewidths=0.5)
# 拟合线
z = np.polyfit(df['mu_tilde'], df['k'], 1)
p = np.poly1d(z)
x_fit = np.linspace(df['mu_tilde'].min(), df['mu_tilde'].max(), 100)
ax2.plot(x_fit, p(x_fit), 'r--', linewidth=2, label=f'Linear fit: k = {z[0]:.3f}μ̃ + {z[1]:.1f}')
# 相关系数
corr = np.corrcoef(df['mu_tilde'], df['k'])[0, 1]
ax2.text(0.05, 0.95, f'r = {corr:.4f}', transform=ax2.transAxes, fontsize=11,
verticalalignment='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
ax2.set_xlabel('Corrected Demand (μ̃)', fontsize=12)
ax2.set_ylabel('Visit Frequency (k)', fontsize=12)
ax2.set_title('(b) k vs μ̃ (Proportionality Check)', fontsize=12)
ax2.legend(fontsize=10)
ax2.grid(True, alpha=0.3)
plt.suptitle('Fig.3: Visit Frequency Allocation Analysis', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig(FIGURES_PATH / 'fig3_k_distribution.png', dpi=150, bbox_inches='tight')
plt.close()
def fig4_efficiency_fairness():
"""Fig.4: 有效性-公平性权衡"""
print(" 生成 Fig.4: 有效性-公平性权衡...")
df = pd.read_excel(METRICS_PATH, sheet_name='metrics_summary')
fig, ax = plt.subplots(figsize=(10, 8))
# 绘制所有方案
colors = ['red', 'blue', 'green', 'orange']
markers = ['o', 's', '^', 'D']
for i, row in df.iterrows():
ax.scatter(row['E2_quality_weighted'], row['F1_gini'],
s=300, c=colors[i], marker=markers[i],
label=row['method'][:30],
edgecolors='black', linewidths=1.5, zorder=5)
# 标注
offset = (15, 15) if i == 0 else (-15, -15) if i == 1 else (15, -15)
ax.annotate(f"E1={row['E1_total_service']:.0f}\nE2={row['E2_quality_weighted']:.0f}\nGini={row['F1_gini']:.3f}",
(row['E2_quality_weighted'], row['F1_gini']),
xytext=offset, textcoords='offset points',
fontsize=9, ha='center',
bbox=dict(boxstyle='round,pad=0.3', facecolor='lightyellow', alpha=0.8),
arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0'))
# 添加权衡箭头
ax.annotate('', xy=(135000, 0.05), xytext=(105000, 0.30),
arrowprops=dict(arrowstyle='<->', color='purple', lw=2))
ax.text(115000, 0.20, 'Efficiency-Fairness\nTradeoff', fontsize=10, ha='center',
color='purple', style='italic')
ax.set_xlabel('E2 (Quality-Weighted Service Volume)', fontsize=12)
ax.set_ylabel('F1 (Gini Coefficient, lower = fairer)', fontsize=12)
ax.set_title('Fig.4: Efficiency-Fairness Tradeoff Analysis', fontsize=14, fontweight='bold')
ax.legend(loc='upper right', fontsize=10)
ax.grid(True, alpha=0.3)
# 设置轴范围
ax.set_xlim(95000, 140000)
ax.set_ylim(0, 0.40)
plt.tight_layout()
plt.savefig(FIGURES_PATH / 'fig4_efficiency_fairness.png', dpi=150, bbox_inches='tight')
plt.close()
def fig5_calendar_heatmap():
"""Fig.5: 日历热力图"""
print(" 生成 Fig.5: 日历热力图...")
df_calendar = pd.read_excel(SCHEDULE_PATH, sheet_name='calendar')
df_allocate = pd.read_excel(ALLOCATE_PATH)
# 创建站点μ映射
mu_map = dict(zip(df_allocate['site_id'], df_allocate['mu']))
# 计算每天的总需求
daily_demand = []
for _, row in df_calendar.iterrows():
demand = 0
if pd.notna(row['site_1_id']):
demand += mu_map.get(int(row['site_1_id']), 0)
if pd.notna(row['site_2_id']):
demand += mu_map.get(int(row['site_2_id']), 0)
daily_demand.append(demand)
df_calendar['total_demand'] = daily_demand
# 创建12x31的热力图矩阵
heatmap_data = np.full((12, 31), np.nan)
for _, row in df_calendar.iterrows():
day = row['day']
# 简单映射: 假设每月30/31天
month = (day - 1) // 31
day_of_month = (day - 1) % 31
if month < 12:
heatmap_data[month, day_of_month] = row['total_demand']
fig, ax = plt.subplots(figsize=(14, 8))
im = ax.imshow(heatmap_data, cmap='YlOrRd', aspect='auto', interpolation='nearest')
# 颜色条
cbar = plt.colorbar(im, ax=ax, shrink=0.8)
cbar.set_label('Daily Total Demand (μ₁ + μ₂)', fontsize=11)
# 轴标签
ax.set_xticks(np.arange(31))
ax.set_xticklabels(np.arange(1, 32), fontsize=8)
ax.set_yticks(np.arange(12))
ax.set_yticklabels(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], fontsize=10)
ax.set_xlabel('Day of Month', fontsize=12)
ax.set_ylabel('Month', fontsize=12)
ax.set_title('Fig.5: Annual Schedule Calendar Heatmap (Daily Demand)', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig(FIGURES_PATH / 'fig5_calendar_heatmap.png', dpi=150, bbox_inches='tight')
plt.close()
def fig6_gap_boxplot():
"""Fig.6: 访问间隔箱线图"""
print(" 生成 Fig.6: 访问间隔箱线图...")
df_gaps = pd.read_excel(SCHEDULE_PATH, sheet_name='gap_statistics')
# 过滤有效数据
df_valid = df_gaps[df_gaps['gap_mean'].notna()].copy()
# 按k分组
df_valid['k_group'] = pd.cut(df_valid['k'], bins=[0, 5, 10, 15, 20, 40],
labels=['1-5', '6-10', '11-15', '16-20', '21+'])
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
# 左图: 间隔均值按k分组的箱线图
ax1 = axes[0]
groups = df_valid.groupby('k_group')['gap_mean'].apply(list).values
group_labels = ['1-5', '6-10', '11-15', '16-20', '21+']
bp = ax1.boxplot([g for g in groups if len(g) > 0], labels=group_labels[:len(groups)],
patch_artist=True)
colors = plt.cm.Blues(np.linspace(0.3, 0.8, len(groups)))
for patch, color in zip(bp['boxes'], colors):
patch.set_facecolor(color)
ax1.set_xlabel('Visit Frequency Group (k)', fontsize=12)
ax1.set_ylabel('Mean Gap (days)', fontsize=12)
ax1.set_title('(a) Mean Visit Interval by Frequency Group', fontsize=12)
ax1.grid(True, alpha=0.3)
# 右图: 间隔CV的分布
ax2 = axes[1]
ax2.hist(df_valid['gap_cv'], bins=20, color='steelblue', edgecolor='black', alpha=0.7)
ax2.axvline(df_valid['gap_cv'].mean(), color='red', linestyle='--', linewidth=2,
label=f'Mean CV = {df_valid["gap_cv"].mean():.3f}')
ax2.axvline(df_valid['gap_cv'].median(), color='green', linestyle=':', linewidth=2,
label=f'Median CV = {df_valid["gap_cv"].median():.3f}')
ax2.set_xlabel('Coefficient of Variation (CV) of Gaps', fontsize=12)
ax2.set_ylabel('Number of Sites', fontsize=12)
ax2.set_title('(b) Distribution of Gap Regularity (CV)', fontsize=12)
ax2.legend(fontsize=10)
ax2.grid(True, alpha=0.3)
plt.suptitle('Fig.6: Visit Interval Analysis', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig(FIGURES_PATH / 'fig6_gap_boxplot.png', dpi=150, bbox_inches='tight')
plt.close()
def fig7_sensitivity():
"""Fig.7: 敏感性分析"""
print(" 生成 Fig.7: 敏感性分析...")
# 读取敏感性分析结果
df_C = pd.read_excel(SENSITIVITY_PATH, sheet_name='sensitivity_C')
df_p = pd.read_excel(SENSITIVITY_PATH, sheet_name='sensitivity_p_thresh')
df_cbar = pd.read_excel(SENSITIVITY_PATH, sheet_name='sensitivity_c_bar')
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
# (a) C对E1的影响
ax1 = axes[0, 0]
ax1.plot(df_C['C'], df_C['E1'], 'o-', color='steelblue', linewidth=2, markersize=8)
ax1.axhline(df_C[df_C['C'] == 400]['E1'].values[0], color='red', linestyle='--', alpha=0.5, label='Baseline (C=400)')
ax1.set_xlabel('Effective Capacity (C)', fontsize=11)
ax1.set_ylabel('E1 (Total Service Volume)', fontsize=11)
ax1.set_title('(a) Effect of C on E1', fontsize=12)
ax1.legend(fontsize=9)
ax1.grid(True, alpha=0.3)
# (b) C对修正站点数的影响
ax2 = axes[0, 1]
ax2.bar(df_C['C'].astype(str), df_C['n_corrected'], color='coral', edgecolor='black', alpha=0.7)
ax2.set_xlabel('Effective Capacity (C)', fontsize=11)
ax2.set_ylabel('Number of Corrected Sites', fontsize=11)
ax2.set_title('(b) Effect of C on Correction Count', fontsize=12)
ax2.grid(True, axis='y', alpha=0.3)
# (c) p_thresh对指标的影响
ax3 = axes[1, 0]
ax3.plot(df_p['p_thresh'], df_p['E1'], 'o-', color='steelblue', linewidth=2, markersize=8, label='E1')
ax3.set_xlabel('Truncation Threshold (p_thresh)', fontsize=11)
ax3.set_ylabel('E1 (Total Service Volume)', fontsize=11)
ax3.set_title('(c) Effect of p_thresh on E1', fontsize=12)
ax3.legend(fontsize=9)
ax3.grid(True, alpha=0.3)
# (d) c_bar对E2的影响
ax4 = axes[1, 1]
ax4.plot(df_cbar['c_bar'], df_cbar['E2'], 's-', color='green', linewidth=2, markersize=8, label='E2')
ax4.axhline(df_cbar[df_cbar['c_bar'] == 250]['E2'].values[0], color='red', linestyle='--', alpha=0.5, label='Baseline (c̄=250)')
ax4.set_xlabel('Quality Threshold (c̄)', fontsize=11)
ax4.set_ylabel('E2 (Quality-Weighted Service)', fontsize=11)
ax4.set_title('(d) Effect of c̄ on E2', fontsize=12)
ax4.legend(fontsize=9)
ax4.grid(True, alpha=0.3)
plt.suptitle('Fig.7: Sensitivity Analysis of Model Parameters', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig(FIGURES_PATH / 'fig7_sensitivity.png', dpi=150, bbox_inches='tight')
plt.close()
def main():
print("=" * 60)
print("Step 09: 可视化")
print("=" * 60)
print(f"\n输出目录: {FIGURES_PATH}")
# 生成所有图表
print("\n[1] 生成图表...")
fig1_site_map()
fig2_demand_correction()
fig3_k_distribution()
fig4_efficiency_fairness()
fig5_calendar_heatmap()
fig6_gap_boxplot()
fig7_sensitivity()
# 列出生成的文件
print(f"\n[2] 已生成图表:")
for f in sorted(FIGURES_PATH.glob('*.png')):
print(f" {f.name}")
print("\n" + "=" * 60)
print("Step 09 完成")
print("=" * 60)
if __name__ == "__main__":
main()

View File

@@ -34,16 +34,16 @@ flowchart TB
B3 --> B5 B3 --> B5
end end
subgraph VALIDATE["结果验证 ⏳ 待完成"] subgraph VALIDATE["结果验证 ✅ 已完成"]
V1[06_validate.py<br/>约束检验] V1[06_validate.py<br/>约束检验]
V2[07_backtest.py<br/>历史回测] V2[07_backtest.py<br/>历史回测]
end end
subgraph SENSITIVITY["敏感性分析 ⏳ 待完成"] subgraph SENSITIVITY["敏感性分析 ✅ 已完成"]
S1[08_sensitivity.py<br/>参数扫描] S1[08_sensitivity.py<br/>参数扫描]
end end
subgraph VISUAL["可视化 ⏳ 待完成"] subgraph VISUAL["可视化 ✅ 已完成"]
P1[09_visualize.py<br/>图表生成] P1[09_visualize.py<br/>图表生成]
end end
@@ -72,9 +72,9 @@ flowchart TB
TASK3 --> TASK4 TASK3 --> TASK4
style CORE fill:#90EE90 style CORE fill:#90EE90
style VALIDATE fill:#FFE4B5 style VALIDATE fill:#90EE90
style SENSITIVITY fill:#FFE4B5 style SENSITIVITY fill:#90EE90
style VISUAL fill:#FFE4B5 style VISUAL fill:#90EE90
``` ```
### ASCII版本详细 ### ASCII版本详细
@@ -96,21 +96,21 @@ flowchart TB
│ ┌─────────────┴────────────┬───────────┴───────────┐ │ │ ┌─────────────┴────────────┬───────────┴───────────┐ │
│ ▼ ▼ ▼ │ │ ▼ ▼ ▼ │
│ ┌──────────────────────┐ ┌──────────────────────┐ ┌──────────────────────┐ │ │ ┌──────────────────────┐ ┌──────────────────────┐ ┌──────────────────────┐ │
│ │ 结果验证 [完成 ] │ │ 敏感性分析 [完成 ] │ │ 可视化 [完成 ] │ │ │ │ 结果验证 [完成 ] │ │ 敏感性分析 [完成 ] │ │ 可视化 [完成 ] │ │
│ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │
│ │ 06_validate.py │ │ 08_sensitivity.py │ │ 09_visualize.py │ │ │ │ 06_validate.py │ │ 08_sensitivity.py │ │ 09_visualize.py │ │
│ │ ┌──────────────────┐ │ │ ┌──────────────────┐ │ │ ┌──────────────────┐ │ │ │ │ ┌──────────────────┐ │ │ ┌──────────────────┐ │ │ ┌──────────────────┐ │ │
│ │ │ 约束满足检验 │ │ │ │ C ∈ {350,400,450}│ │ │ │ 站点地图(需求) │ │ │ │ │ │ 约束满足检验 │ │ │ │ C ∈ {350..450} │ │ │ │ 站点地图(需求) │ │ │
│ │ │ - Σk=730? │ │ │ │ p阈值扫描 │ │ │ │ k分布直方图 │ │ │ │ │ │ - Σk=730 │ │ │ │ p阈值扫描 │ │ │ │ k分布直方图 │ │ │
│ │ │ - k≥1? │ │ │ │ C̄ ∈ {200,250,300}│ │ │ │ E-F权衡曲线 │ │ │ │ │ │ - k≥1 │ │ │ │ C̄ ∈ {200..300}│ │ │ │ E-F权衡曲线 │ │ │
│ │ │ - 每日2站点? │ │ │ │ │ │ │ │ 日历热力图 │ │ │ │ │ │ - 每日2站点 │ │ │ │ │ │ │ │ 日历热力图 │ │ │
│ │ └──────────────────┘ │ │ └──────────────────┘ │ │ │ 间隔分布箱线图 │ │ │ │ │ └──────────────────┘ │ │ └──────────────────┘ │ │ │ 间隔分布箱线图 │ │ │
│ │ │ │ │ │ └──────────────────┘ │ │ │ │ │ │ │ │ └──────────────────┘ │ │
│ │ 07_backtest.py │ │ 输出: │ │ │ │ │ │ 07_backtest.py │ │ 结论: │ │ │ │
│ │ ┌──────────────────┐ │ │ • 参数-指标表 │ │ 输出: │ │ │ │ ┌──────────────────┐ │ │ E1变化 < 1.3% │ │ 输出: │ │
│ │ │ 2019数据回测 │ │ │ • Pareto前沿图 │ │ figures/*.png │ │ │ │ │ 2019数据回测 │ │ │ 模型稳健 │ │ figures/*.png │ │
│ │ │ 预测vs实际对比 │ │ │ │ │ • 论文插图 │ │ │ │ │ 预测vs实际对比 │ │ │ │ │ (7张图) │ │
│ │ │ 残差分析 │ │ │ │ │ │ │ │ │ │ 残差分析 │ │ │ │ │ │ │
│ │ └──────────────────┘ │ │ │ │ │ │ │ │ └──────────────────┘ │ │ │ │ │ │
│ └──────────────────────┘ └──────────────────────┘ └──────────────────────┘ │ │ └──────────────────────┘ └──────────────────────┘ └──────────────────────┘ │
│ │ │ │ │ │
@@ -140,10 +140,10 @@ flowchart TB
| 频次分配 | ✅ 完成 | `03_allocate.py` | Hamilton方法 | | 频次分配 | ✅ 完成 | `03_allocate.py` | Hamilton方法 |
| 指标计算 | ✅ 完成 | `04_evaluate.py` | E1,E2,F1,F2 | | 指标计算 | ✅ 完成 | `04_evaluate.py` | E1,E2,F1,F2 |
| 日历排程 | ✅ 完成 | `05_schedule.py` | 贪心+局部优化 | | 日历排程 | ✅ 完成 | `05_schedule.py` | 贪心+局部优化 |
| **约束验证** | ⏳ 待完成 | `06_validate.py` | 硬约束检验 | | 约束验证 | ✅ 完成 | `06_validate.py` | 6项硬约束全部通过 |
| **历史回测** | ⏳ 待完成 | `07_backtest.py` | 模型有效性 | | 历史回测 | ✅ 完成 | `07_backtest.py` | 模型有效性验证 |
| **敏感性分析** | ⏳ 待完成 | `08_sensitivity.py` | 参数稳健性 | | 敏感性分析 | ✅ 完成 | `08_sensitivity.py` | 参数稳健性检验 |
| **可视化** | ⏳ 待完成 | `09_visualize.py` | 论文图表 | | 可视化 | ✅ 完成 | `09_visualize.py` | 7张论文图表 |
--- ---
@@ -422,84 +422,119 @@ $$\Delta_i^* = \frac{365}{k_i}$$
--- ---
## 6. 待完成:结果验证 ## 6. 结果验证
### 6.1 约束满足检验 (`06_validate.py`) ### 6.1 约束满足检验 (`06_validate.py`)
| 检验项 | 公式 | 预期结果 | | 检验项 | 公式 | 结果 |
|--------|------|---------| |--------|------|------|
| 资源约束 | $\sum k_i = 730$ | 通过 | | C1: 资源约束 | $\sum k_i = 730$ | 通过 |
| 覆盖约束 | $\min k_i \geq 1$ | 通过 | | C2: 覆盖约束 | $\min k_i \geq 1$ | ✅ 通过 (min=2) |
| 日容量约束 | 每日恰好2站点 | 通过 | | C3: 日容量约束 | 每日恰好2站点 | 通过 |
| 无重复约束 | 同一天不重复访问同一站点 | 通过 | | C4: 无重复约束 | 同一天不重复访问同一站点 | 通过 |
| C5: 排程一致性 | 排程次数 = k_i | ✅ 通过 |
| C6: 总天数 | 日历天数 = 365 | ✅ 通过 |
**结论**所有6项硬约束全部通过。
### 6.2 模型有效性验证 (`07_backtest.py`) ### 6.2 模型有效性验证 (`07_backtest.py`)
**方法**留一交叉验证Leave-One-Out **关键发现**
1. 用2019年的69个站点数据训练模型 | 指标 | 值 | 解读 |
2. 预测第70个站点的 $\tilde{\mu}$ 和 $k$ |------|-----|------|
3. 与实际值对比 | corr(visits_2019, μ) | 0.0352 | 2019年访问次数与需求弱相关 |
4. 重复70次计算平均误差 | corr(visits_2019, μ̃) | 0.0433 | 历史分配未充分考虑需求 |
| CV(k/μ̃) | 0.0523 | k与μ̃高度成正比模型内部一致 |
| 服务量改进 | +35.24% | 相比2019缩放方案 |
**评估指标** **结论**2019年历史分配与需求几乎不相关(r=0.035)说明历史分配未按需求进行。推荐方案实现按需分配服务量提升35%。
- MAPE平均绝对百分比误差
- $R^2$(决定系数)
--- ---
## 7. 待完成:敏感性分析 ## 7. 敏感性分析
### 7.1 参数扫描 (`08_sensitivity.py`) ### 7.1 参数扫描 (`08_sensitivity.py`)
| 参数 | 基准值 | 扫描范围 | 影响 | | 参数 | 基准值 | 扫描范围 | 影响 |
|------|--------|---------|------| |------|--------|---------|------|
| 有效容量 $C$ | 400 | [350, 450] | 截断修正强度 | | 有效容量 $C$ | 400 | [350, 375, 400, 425, 450] | 截断修正强度 |
| 截断阈值 $p^{trunc}$ | 0.02 | [0.01, 0.10] | 修正站点数 | | 截断阈值 $p^{trunc}$ | 0.02 | [0.01, 0.02, 0.05, 0.10] | 修正站点数 |
| 质量阈值 $\bar{C}$ | 250 | [200, 300] | E2计算 | | 质量阈值 $\bar{C}$ | 250 | [200, 225, 250, 275, 300] | E2计算 |
### 7.2 敏感性报告格式 ### 7.2 敏感性分析结果
**C (有效容量) 敏感性**
``` ```
C = 350: 修正7站点, E1=141,234, F1=0.318 C = 350: 修正9站点, E1=141,300, F1=0.3141
C = 400: 修正5站点, E1=140,121, F1=0.314 ← 基准 C = 375: 修正7站点, E1=140,476, F1=0.3115
C = 450: 修正3站点, E1=139,456, F1=0.310 C = 400: 修正5站点, E1=140,121, F1=0.3140 ← 基准
C = 425: 修正3站点, E1=139,692, F1=0.3146
C = 450: 修正2站点, E1=139,487, F1=0.3153
``` ```
**稳健性结论**
- C 变化 [350, 450] 时E1 变化范围仅 1813 (1.29%)
- p_thresh 变化 [0.01, 0.10] 时E1 变化范围仅 80 (0.06%)
- **模型对参数变化不敏感,结果稳健**
--- ---
## 8. 待完成:可视化 ## 8. 可视化
### 8.1 图表清单 (`09_visualize.py`) ### 8.1 图表清单 (`09_visualize.py`)
| 图编号 | 图名 | 类型 | 用途 | | 图编号 | 图名 | 文件 | 用途 |
|--------|------|------|------| |--------|------|------|------|
| Fig.1 | 站点地图 | 散点图+地图底图 | 展示站点分布需求 | | Fig.1 | 站点地图 | `fig1_site_map.png` | 站点分布需求大小、访问频次 |
| Fig.2 | 需求修正对比 | 柱状图 | 展示修正前后μ变化 | | Fig.2 | 需求修正对比 | `fig2_demand_correction.png` | 5个修正站点μ→μ̃变化 |
| Fig.3 | 频次分配分布 | 直方图 | 展示k的分布 | | Fig.3 | 频次分配分布 | `fig3_k_distribution.png` | k分布 + k与μ̃相关性 |
| Fig.4 | 有效性-公平性权衡 | 散点图 | Pareto前沿 | | Fig.4 | 有效性-公平性权衡 | `fig4_efficiency_fairness.png` | 4种方案E-F对比 |
| Fig.5 | 日历热力图 | 热力图 | 全年排程概览 | | Fig.5 | 日历热力图 | `fig5_calendar_heatmap.png` | 全年排程可视化 |
| Fig.6 | 访问间隔箱线图 | 箱线图 | 间隔分布 | | Fig.6 | 访问间隔箱线图 | `fig6_gap_boxplot.png` | 间隔均匀性分析 |
| Fig.7 | 敏感性分析 | 折线图 | 参数影响 | | Fig.7 | 敏感性分析 | `fig7_sensitivity.png` | C, p_thresh, c̄的影响 |
### 8.2 图表示例说明 ### 8.2 Fig.1: 站点地
**Fig.1 站点地图** 展示70个站点的地理分布点大小表示需求μ颜色表示访问频次k。
- X轴经度
- Y轴纬度
- 点大小:$\mu_i$(需求)
- 点颜色:$k_i$(访问频次)
**Fig.4 有效性-公平性权衡** ![Fig.1: 站点地图](figures/fig1_site_map.png)
- X轴E2质量加权服务量
- Y轴F1Gini系数越小越公平
- 多个方案的散点
- Pareto前沿曲线
**Fig.5 日历热力图** ### 8.3 Fig.2: 需求修正对比
- X轴月份(1-12)
- Y轴日期(1-31) 展示5个被截断修正的高需求站点对比修正前μ和修正后μ̃。
- 颜色:该日访问的站点需求总和
![Fig.2: 需求修正对比](figures/fig2_demand_correction.png)
### 8.4 Fig.3: 频次分配分布
左图k的分布直方图右图k与μ̃的线性关系r≈1验证按比例分配
![Fig.3: 频次分配分布](figures/fig3_k_distribution.png)
### 8.5 Fig.4: 有效性-公平性权衡
展示4种分配方案在E2-F1空间的位置揭示效率与公平的Pareto权衡。
![Fig.4: 有效性-公平性权衡](figures/fig4_efficiency_fairness.png)
### 8.6 Fig.5: 日历热力图
全年365天排程可视化颜色表示当日访问站点的需求总和。
![Fig.5: 日历热力图](figures/fig5_calendar_heatmap.png)
### 8.7 Fig.6: 访问间隔分析
左图不同频次组的间隔均值分布右图间隔CV分布衡量均匀性
![Fig.6: 访问间隔分析](figures/fig6_gap_boxplot.png)
### 8.8 Fig.7: 敏感性分析
参数C、p_thresh、c̄对模型输出的影响验证模型稳健性。
![Fig.7: 敏感性分析](figures/fig7_sensitivity.png)
--- ---
@@ -514,29 +549,30 @@ task1/
├── 03_allocate.py ✅ 频次分配 ├── 03_allocate.py ✅ 频次分配
├── 04_evaluate.py ✅ 指标计算 ├── 04_evaluate.py ✅ 指标计算
├── 05_schedule.py ✅ 日历排程 ├── 05_schedule.py ✅ 日历排程
├── 06_validate.py 约束验证 ├── 06_validate.py 约束验证
├── 07_backtest.py 历史回测 ├── 07_backtest.py 历史回测
├── 08_sensitivity.py 敏感性分析 ├── 08_sensitivity.py 敏感性分析
├── 09_visualize.py 可视化 ├── 09_visualize.py 可视化
├── 01_clean.xlsx ├── 01_clean.xlsx
├── 02_demand.xlsx ├── 02_demand.xlsx
├── 03_allocate.xlsx ├── 03_allocate.xlsx
├── 04_metrics.xlsx ├── 04_metrics.xlsx
├── 05_schedule.xlsx ├── 05_schedule.xlsx
── figures/ ⏳ 图表输出目录 ── 06_validate.xlsx
├── 07_backtest.xlsx
├── 08_sensitivity.xlsx
└── figures/ ✅ 7张图表
``` ```
### 9.2 运行命令 ### 9.2 运行命令
```bash ```bash
# 核心流程(已完成) # 完整流程(全部已完成)
python task1/01_clean.py python task1/01_clean.py
python task1/02_demand_correction.py python task1/02_demand_correction.py
python task1/03_allocate.py python task1/03_allocate.py
python task1/04_evaluate.py python task1/04_evaluate.py
python task1/05_schedule.py python task1/05_schedule.py
# 验证与分析(待完成)
python task1/06_validate.py python task1/06_validate.py
python task1/07_backtest.py python task1/07_backtest.py
python task1/08_sensitivity.py python task1/08_sensitivity.py

Binary file not shown.

After

Width:  |  Height:  |  Size: 148 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 73 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 106 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 119 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 60 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 83 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 192 KiB