This commit is contained in:
2026-01-19 11:57:19 +08:00
parent c6e22729c7
commit ffc51b2927
13 changed files with 1542 additions and 0 deletions

119
task3/01_distance.py Normal file
View File

@@ -0,0 +1,119 @@
"""
Task 3 - Step 1: 距离矩阵计算
=============================
输入: task1/03_allocate.xlsx (70个站点的坐标信息)
输出: task3/01_distance.xlsx (70×70距离矩阵 + 站点信息)
距离计算公式 (Haversine简化):
l_ij = 69.0 * sqrt((lat_i - lat_j)^2 + cos^2(lat_avg * pi/180) * (lon_i - lon_j)^2)
单位: 英里
"""
import pandas as pd
import numpy as np
# ============================================
# 参数设置
# ============================================
INPUT_FILE = '../task1/03_allocate.xlsx'
OUTPUT_FILE = '01_distance.xlsx'
# ============================================
# 读取数据
# ============================================
print("=" * 60)
print("Task 3 - Step 1: 距离矩阵计算")
print("=" * 60)
df = pd.read_excel(INPUT_FILE)
print(f"\n读取站点数据: {len(df)} 个站点")
print(f"列名: {df.columns.tolist()}")
# 提取关键列
sites = df[['site_id', 'site_name', 'lat', 'lon', 'mu', 'sigma', 'mu_tilde', 'k']].copy()
print(f"\n站点数据概览:")
print(sites.head())
# ============================================
# 距离计算函数
# ============================================
def calc_distance(lat1, lon1, lat2, lon2):
"""
计算两点间的近似距离(英里)
使用Haversine公式的简化版本适用于小范围地域
"""
lat_avg = (lat1 + lat2) / 2
lat_avg_rad = np.radians(lat_avg)
delta_lat = lat1 - lat2
delta_lon = lon1 - lon2
# 69.0 miles per degree of latitude
# cos(lat) correction for longitude
distance = 69.0 * np.sqrt(delta_lat**2 + (np.cos(lat_avg_rad) * delta_lon)**2)
return distance
# ============================================
# 构建距离矩阵
# ============================================
n = len(sites)
distance_matrix = np.zeros((n, n))
for i in range(n):
for j in range(n):
if i != j:
distance_matrix[i, j] = calc_distance(
sites.iloc[i]['lat'], sites.iloc[i]['lon'],
sites.iloc[j]['lat'], sites.iloc[j]['lon']
)
# 转换为DataFrame
site_ids = sites['site_id'].values
df_distance = pd.DataFrame(distance_matrix, index=site_ids, columns=site_ids)
# ============================================
# 统计信息
# ============================================
# 提取上三角(排除对角线)
upper_tri = distance_matrix[np.triu_indices(n, k=1)]
print(f"\n距离矩阵统计:")
print(f" - 站点对总数: {len(upper_tri)}")
print(f" - 最小距离: {upper_tri.min():.2f} 英里")
print(f" - 最大距离: {upper_tri.max():.2f} 英里")
print(f" - 平均距离: {upper_tri.mean():.2f} 英里")
print(f" - 中位数距离: {np.median(upper_tri):.2f} 英里")
# 按阈值统计
thresholds = [30, 40, 50, 60, 70]
print(f"\n距离阈值统计:")
for th in thresholds:
count = np.sum(upper_tri <= th)
print(f" - ≤ {th} 英里: {count} 对 ({count/len(upper_tri)*100:.1f}%)")
# ============================================
# 保存结果
# ============================================
with pd.ExcelWriter(OUTPUT_FILE, engine='openpyxl') as writer:
# Sheet 1: 站点信息
sites.to_excel(writer, sheet_name='sites', index=False)
# Sheet 2: 距离矩阵
df_distance.to_excel(writer, sheet_name='distance_matrix')
# Sheet 3: 距离统计
stats = pd.DataFrame({
'metric': ['min', 'max', 'mean', 'median', 'std', 'total_pairs'],
'value': [upper_tri.min(), upper_tri.max(), upper_tri.mean(),
np.median(upper_tri), upper_tri.std(), len(upper_tri)]
})
stats.to_excel(writer, sheet_name='statistics', index=False)
print(f"\n结果已保存至: {OUTPUT_FILE}")
print(" - Sheet 'sites': 站点信息")
print(" - Sheet 'distance_matrix': 70×70距离矩阵")
print(" - Sheet 'statistics': 距离统计")
print("\n" + "=" * 60)

BIN
task3/01_distance.xlsx Normal file

Binary file not shown.

209
task3/02_pairing.py Normal file
View File

@@ -0,0 +1,209 @@
"""
Task 3 - Step 2: 共生站点配对筛选与选择
=======================================
输入: 01_distance.xlsx (距离矩阵 + 站点信息)
输出: 02_pairing.xlsx (筛选后的配对列表 + 最终选择)
配对筛选条件:
1. 距离约束: l_ij ≤ l_max (50英里)
2. 容量约束: μ_i + μ_j ≤ 450
3. 稳定性约束: CV_i ≤ 0.5 且 CV_j ≤ 0.5
配对价值函数:
V_ij = α * (μ_i + μ_j) / Q - β * l_ij / l_max - γ * (σ_i² + σ_j²) / (μ_i + μ_j)²
配对选择: 贪心算法,每个站点最多配对一次
"""
import pandas as pd
import numpy as np
# ============================================
# 参数设置
# ============================================
INPUT_FILE = '01_distance.xlsx'
OUTPUT_FILE = '02_pairing.xlsx'
# 约束参数
L_MAX = 50 # 距离阈值 (英里)
MU_SUM_MAX = 450 # 需求和上限
CV_MAX = 0.5 # 变异系数上限
Q = 400 # 卡车容量
# 价值函数权重
ALPHA = 1.0 # 容量利用率权重
BETA = 0.3 # 距离惩罚权重
GAMMA = 0.5 # 风险惩罚权重
# ============================================
# 读取数据
# ============================================
print("=" * 60)
print("Task 3 - Step 2: 共生站点配对筛选与选择")
print("=" * 60)
# 读取站点信息
sites = pd.read_excel(INPUT_FILE, sheet_name='sites')
print(f"\n读取站点数据: {len(sites)} 个站点")
# 读取距离矩阵
dist_matrix = pd.read_excel(INPUT_FILE, sheet_name='distance_matrix', index_col=0)
print(f"读取距离矩阵: {dist_matrix.shape}")
# 计算变异系数
sites['cv'] = sites['sigma'] / sites['mu']
print(f"\nCV范围: [{sites['cv'].min():.3f}, {sites['cv'].max():.3f}]")
print(f"CV > {CV_MAX} 的站点数: {(sites['cv'] > CV_MAX).sum()}")
# ============================================
# 配对筛选
# ============================================
print(f"\n" + "-" * 40)
print("配对筛选")
print("-" * 40)
candidates = []
n = len(sites)
for i in range(n):
for j in range(i + 1, n): # 只考虑上三角
site_i = sites.iloc[i]
site_j = sites.iloc[j]
# 获取距离
dist = dist_matrix.iloc[i, j]
# 约束检查
# 1. 距离约束
if dist > L_MAX:
continue
# 2. 容量约束
mu_sum = site_i['mu'] + site_j['mu']
if mu_sum > MU_SUM_MAX:
continue
# 3. 稳定性约束
if site_i['cv'] > CV_MAX or site_j['cv'] > CV_MAX:
continue
# 通过所有约束,计算配对价值
sigma_sq_sum = site_i['sigma']**2 + site_j['sigma']**2
# 价值函数
value = (ALPHA * mu_sum / Q
- BETA * dist / L_MAX
- GAMMA * sigma_sq_sum / mu_sum**2)
candidates.append({
'site_i_id': site_i['site_id'],
'site_j_id': site_j['site_id'],
'site_i_name': site_i['site_name'],
'site_j_name': site_j['site_name'],
'distance': dist,
'mu_i': site_i['mu'],
'mu_j': site_j['mu'],
'mu_sum': mu_sum,
'sigma_i': site_i['sigma'],
'sigma_j': site_j['sigma'],
'cv_i': site_i['cv'],
'cv_j': site_j['cv'],
'k_i': site_i['k'],
'k_j': site_j['k'],
'value': value
})
df_candidates = pd.DataFrame(candidates)
print(f"通过约束的候选配对数: {len(df_candidates)}")
if len(df_candidates) > 0:
print(f"配对价值范围: [{df_candidates['value'].min():.3f}, {df_candidates['value'].max():.3f}]")
print(f"平均距离: {df_candidates['distance'].mean():.2f} 英里")
print(f"平均需求和: {df_candidates['mu_sum'].mean():.1f}")
# ============================================
# 贪心配对选择
# ============================================
print(f"\n" + "-" * 40)
print("贪心配对选择")
print("-" * 40)
# 按价值降序排序
df_candidates_sorted = df_candidates.sort_values('value', ascending=False).reset_index(drop=True)
# 贪心选择
selected_pairs = []
used_sites = set()
for _, row in df_candidates_sorted.iterrows():
site_i = row['site_i_id']
site_j = row['site_j_id']
# 检查是否已被使用
if site_i not in used_sites and site_j not in used_sites:
selected_pairs.append(row.to_dict())
used_sites.add(site_i)
used_sites.add(site_j)
df_selected = pd.DataFrame(selected_pairs)
print(f"最终选择配对数: {len(df_selected)}")
print(f"涉及站点数: {len(used_sites)} (占总站点 {len(used_sites)/n*100:.1f}%)")
print(f"未配对站点数: {n - len(used_sites)}")
if len(df_selected) > 0:
print(f"\n选中配对的价值范围: [{df_selected['value'].min():.3f}, {df_selected['value'].max():.3f}]")
print(f"选中配对的距离范围: [{df_selected['distance'].min():.2f}, {df_selected['distance'].max():.2f}] 英里")
print(f"选中配对的需求和范围: [{df_selected['mu_sum'].min():.1f}, {df_selected['mu_sum'].max():.1f}]")
# ============================================
# 显示选中的配对
# ============================================
print(f"\n" + "-" * 40)
print("选中的配对列表")
print("-" * 40)
if len(df_selected) > 0:
display_cols = ['site_i_name', 'site_j_name', 'distance', 'mu_sum', 'k_i', 'k_j', 'value']
print(df_selected[display_cols].to_string(index=False))
# ============================================
# 保存结果
# ============================================
with pd.ExcelWriter(OUTPUT_FILE, engine='openpyxl') as writer:
# Sheet 1: 站点信息含CV
sites.to_excel(writer, sheet_name='sites', index=False)
# Sheet 2: 所有候选配对
df_candidates_sorted.to_excel(writer, sheet_name='all_candidates', index=False)
# Sheet 3: 最终选择的配对
df_selected.to_excel(writer, sheet_name='selected_pairs', index=False)
# Sheet 4: 参数记录
params = pd.DataFrame({
'parameter': ['L_MAX', 'MU_SUM_MAX', 'CV_MAX', 'Q', 'ALPHA', 'BETA', 'GAMMA'],
'value': [L_MAX, MU_SUM_MAX, CV_MAX, Q, ALPHA, BETA, GAMMA],
'description': ['距离阈值(英里)', '需求和上限', 'CV上限', '卡车容量',
'容量利用率权重', '距离惩罚权重', '风险惩罚权重']
})
params.to_excel(writer, sheet_name='parameters', index=False)
# Sheet 5: 汇总统计
summary = pd.DataFrame({
'metric': ['total_sites', 'candidate_pairs', 'selected_pairs',
'paired_sites', 'unpaired_sites', 'avg_distance', 'avg_mu_sum'],
'value': [n, len(df_candidates), len(df_selected),
len(used_sites), n - len(used_sites),
df_selected['distance'].mean() if len(df_selected) > 0 else 0,
df_selected['mu_sum'].mean() if len(df_selected) > 0 else 0]
})
summary.to_excel(writer, sheet_name='summary', index=False)
print(f"\n结果已保存至: {OUTPUT_FILE}")
print(" - Sheet 'sites': 站点信息含CV")
print(" - Sheet 'all_candidates': 所有候选配对")
print(" - Sheet 'selected_pairs': 最终选择的配对")
print(" - Sheet 'parameters': 参数记录")
print(" - Sheet 'summary': 汇总统计")
print("\n" + "=" * 60)

BIN
task3/02_pairing.xlsx Normal file

Binary file not shown.

220
task3/03_allocation.py Normal file
View File

@@ -0,0 +1,220 @@
"""
Task 3 - Step 3: 第一站点最优分配计算
=====================================
输入: 02_pairing.xlsx (选中的配对列表)
输出: 03_allocation.xlsx (含最优分配量q*的配对列表)
最优分配公式:
q* = (σ_j * μ_i + σ_i * (Q - μ_j)) / (σ_i + σ_j)
鲁棒性约束:
q_final = clip(q*, μ_i - σ_i, Q - μ_j + σ_j)
物理意义:
- 波动大的站点需要更多"缓冲"
- 当σ_j >> σ_i时q* → μ_i精确分配给站点i
- 当σ_i >> σ_j时q* → Q - μ_j为站点j预留更多
"""
import pandas as pd
import numpy as np
from scipy import stats
# ============================================
# 参数设置
# ============================================
INPUT_FILE = '02_pairing.xlsx'
OUTPUT_FILE = '03_allocation.xlsx'
Q = 400 # 卡车容量
K_ROBUST = 1 # 鲁棒性水平 (84%保护)
# ============================================
# 读取数据
# ============================================
print("=" * 60)
print("Task 3 - Step 3: 第一站点最优分配计算")
print("=" * 60)
# 读取选中的配对
df_pairs = pd.read_excel(INPUT_FILE, sheet_name='selected_pairs')
print(f"\n读取配对数据: {len(df_pairs)}")
# ============================================
# 计算最优分配
# ============================================
print(f"\n" + "-" * 40)
print("计算最优分配 q*")
print("-" * 40)
def optimal_allocation(mu_i, sigma_i, mu_j, sigma_j, Q=400):
"""
计算双站点访问时第一站点的最优分配量
公式: q* = (σ_j * μ_i + σ_i * (Q - μ_j)) / (σ_i + σ_j)
"""
if sigma_i + sigma_j == 0:
# 边界情况:两站点都无波动
return mu_i
q_star = (sigma_j * mu_i + sigma_i * (Q - mu_j)) / (sigma_i + sigma_j)
return q_star
def robust_allocation(q_star, mu_i, sigma_i, mu_j, sigma_j, Q=400, k=1):
"""
应用鲁棒性约束
约束: μ_i - k*σ_i ≤ q ≤ Q - μ_j + k*σ_j
"""
q_lower = max(0, mu_i - k * sigma_i)
q_upper = min(Q, Q - mu_j + k * sigma_j)
# 确保上下界合理
if q_lower > q_upper:
# 如果约束冲突,取中点
q_final = (q_lower + q_upper) / 2
else:
q_final = np.clip(q_star, q_lower, q_upper)
return q_final, q_lower, q_upper
def expected_service(q, mu, sigma):
"""
计算截断期望 E[min(D, q)]
E[min(D, q)] = μ * Φ(z) - σ * φ(z) + q * (1 - Φ(z))
where z = (q - μ) / σ
当q > μ时大部分时候D < q期望接近μ
当q < μ时经常D > q期望接近q但略低
"""
if sigma == 0:
return min(mu, q)
z = (q - mu) / sigma
phi_z = stats.norm.pdf(z)
Phi_z = stats.norm.cdf(z)
return mu * Phi_z - sigma * phi_z + q * (1 - Phi_z)
# 计算每个配对的分配
results = []
for idx, row in df_pairs.iterrows():
mu_i = row['mu_i']
sigma_i = row['sigma_i']
mu_j = row['mu_j']
sigma_j = row['sigma_j']
# 计算最优分配
q_star = optimal_allocation(mu_i, sigma_i, mu_j, sigma_j, Q)
# 应用鲁棒性约束
q_final, q_lower, q_upper = robust_allocation(
q_star, mu_i, sigma_i, mu_j, sigma_j, Q, K_ROBUST
)
# 计算期望服务量
E_Si = expected_service(q_final, mu_i, sigma_i)
E_Sj = expected_service(Q - q_final, mu_j, sigma_j)
E_total = E_Si + E_Sj
# 计算分配比例
alloc_ratio = q_final / Q if Q > 0 else 0
results.append({
**row.to_dict(),
'q_star': q_star,
'q_lower': q_lower,
'q_upper': q_upper,
'q_final': q_final,
'q_ratio': alloc_ratio,
'E_Si': E_Si,
'E_Sj': E_Sj,
'E_total': E_total,
'efficiency': E_total / (mu_i + mu_j) if (mu_i + mu_j) > 0 else 0
})
df_allocation = pd.DataFrame(results)
# ============================================
# 统计信息
# ============================================
print(f"\n分配统计:")
print(f" - q* 范围: [{df_allocation['q_star'].min():.1f}, {df_allocation['q_star'].max():.1f}]")
print(f" - q_final 范围: [{df_allocation['q_final'].min():.1f}, {df_allocation['q_final'].max():.1f}]")
print(f" - 分配比例范围: [{df_allocation['q_ratio'].min():.2%}, {df_allocation['q_ratio'].max():.2%}]")
print(f" - 平均分配比例: {df_allocation['q_ratio'].mean():.2%}")
print(f"\n期望服务量统计:")
print(f" - E[S_i] 范围: [{df_allocation['E_Si'].min():.1f}, {df_allocation['E_Si'].max():.1f}]")
print(f" - E[S_j] 范围: [{df_allocation['E_Sj'].min():.1f}, {df_allocation['E_Sj'].max():.1f}]")
print(f" - E[total] 范围: [{df_allocation['E_total'].min():.1f}, {df_allocation['E_total'].max():.1f}]")
print(f" - 平均效率: {df_allocation['efficiency'].mean():.2%}")
# 检查是否有被约束裁剪的情况
clipped_lower = (df_allocation['q_final'] <= df_allocation['q_lower'] + 0.01).sum()
clipped_upper = (df_allocation['q_final'] >= df_allocation['q_upper'] - 0.01).sum()
print(f"\n鲁棒性约束影响:")
print(f" - 触及下界: {clipped_lower}")
print(f" - 触及上界: {clipped_upper}")
print(f" - 未被裁剪: {len(df_allocation) - clipped_lower - clipped_upper}")
# ============================================
# 显示关键配对的分配
# ============================================
print(f"\n" + "-" * 40)
print("高价值配对的分配方案 (Top 10)")
print("-" * 40)
display_cols = ['site_i_name', 'site_j_name', 'mu_i', 'mu_j', 'q_final', 'E_Si', 'E_Sj', 'E_total']
df_top = df_allocation.nlargest(10, 'value')[display_cols].copy()
df_top['site_i_name'] = df_top['site_i_name'].str[:25]
df_top['site_j_name'] = df_top['site_j_name'].str[:25]
print(df_top.to_string(index=False))
# ============================================
# 保存结果
# ============================================
with pd.ExcelWriter(OUTPUT_FILE, engine='openpyxl') as writer:
# Sheet 1: 完整分配结果
df_allocation.to_excel(writer, sheet_name='allocation', index=False)
# Sheet 2: 简化视图
simple_cols = ['site_i_id', 'site_j_id', 'site_i_name', 'site_j_name',
'mu_i', 'mu_j', 'sigma_i', 'sigma_j', 'k_i', 'k_j',
'q_final', 'E_Si', 'E_Sj', 'E_total', 'value']
df_allocation[simple_cols].to_excel(writer, sheet_name='allocation_simple', index=False)
# Sheet 3: 参数记录
params = pd.DataFrame({
'parameter': ['Q', 'K_ROBUST'],
'value': [Q, K_ROBUST],
'description': ['卡车容量', '鲁棒性水平(k*σ)']
})
params.to_excel(writer, sheet_name='parameters', index=False)
# Sheet 4: 汇总统计
summary = pd.DataFrame({
'metric': ['total_pairs', 'avg_q_ratio', 'avg_E_total', 'avg_efficiency',
'min_E_total', 'max_E_total', 'clipped_lower', 'clipped_upper'],
'value': [len(df_allocation),
df_allocation['q_ratio'].mean(),
df_allocation['E_total'].mean(),
df_allocation['efficiency'].mean(),
df_allocation['E_total'].min(),
df_allocation['E_total'].max(),
clipped_lower, clipped_upper]
})
summary.to_excel(writer, sheet_name='summary', index=False)
print(f"\n结果已保存至: {OUTPUT_FILE}")
print(" - Sheet 'allocation': 完整分配结果")
print(" - Sheet 'allocation_simple': 简化视图")
print(" - Sheet 'parameters': 参数记录")
print(" - Sheet 'summary': 汇总统计")
print("\n" + "=" * 60)

BIN
task3/03_allocation.xlsx Normal file

Binary file not shown.

237
task3/04_reschedule.py Normal file
View File

@@ -0,0 +1,237 @@
"""
Task 3 - Step 4: 访问次数重分配
================================
输入:
- 03_allocation.xlsx (配对及分配方案)
- ../task1/03_allocate.xlsx (原始频次分配)
输出: 04_reschedule.xlsx (重分配后的访问次数)
重分配逻辑:
1. 对于每个配对(i,j),计算双站点访问次数: k_ij = floor(min(k_i, k_j) / 2)
2. 更新单独访问次数: k'_i = k_i - k_ij, k'_j = k_j - k_ij
3. 释放的槽位: ΔN = Σ k_ij
4. 将ΔN按需求比例分配给所有站点
约束:
- 双站点访问算1次"访问事件"
- 每天仍然2次访问事件单站点或双站点
- 总访问事件数 = 730
"""
import pandas as pd
import numpy as np
# ============================================
# 参数设置
# ============================================
ALLOCATION_FILE = '03_allocation.xlsx'
TASK1_FILE = '../task1/03_allocate.xlsx'
OUTPUT_FILE = '04_reschedule.xlsx'
MERGE_RATIO = 0.5 # 合并比例: min(k_i, k_j) * ratio
TOTAL_EVENTS = 730 # 年度总访问事件数
# ============================================
# 读取数据
# ============================================
print("=" * 60)
print("Task 3 - Step 4: 访问次数重分配")
print("=" * 60)
# 读取配对数据
df_pairs = pd.read_excel(ALLOCATION_FILE, sheet_name='allocation')
print(f"\n读取配对数据: {len(df_pairs)}")
# 读取原始分配
df_original = pd.read_excel(TASK1_FILE)
print(f"读取原始分配: {len(df_original)} 个站点")
print(f"原始总访问次数: {df_original['k'].sum()}")
# ============================================
# 计算双站点访问次数
# ============================================
print(f"\n" + "-" * 40)
print("计算双站点访问次数")
print("-" * 40)
# 创建站点访问信息的副本
sites = df_original[['site_id', 'site_name', 'mu', 'sigma', 'mu_tilde', 'k']].copy()
sites['k_original'] = sites['k']
sites['k_single'] = sites['k'] # 将被更新
sites['k_dual'] = 0 # 作为配对中的一员参与双站点访问的次数
sites['is_paired'] = False
sites['pair_partner'] = None
# 记录配对信息
pair_visits = []
paired_sites = set()
for idx, row in df_pairs.iterrows():
site_i = row['site_i_id']
site_j = row['site_j_id']
k_i = row['k_i']
k_j = row['k_j']
# 计算双站点访问次数
k_ij = int(min(k_i, k_j) * MERGE_RATIO)
# 确保至少保留1次单独访问
if k_ij >= min(k_i, k_j):
k_ij = min(k_i, k_j) - 1
if k_ij < 1:
k_ij = 0 # 如果无法合并,跳过
# 更新单独访问次数
k_i_single = k_i - k_ij
k_j_single = k_j - k_ij
# 更新sites表
sites.loc[sites['site_id'] == site_i, 'k_single'] = k_i_single
sites.loc[sites['site_id'] == site_i, 'k_dual'] = k_ij
sites.loc[sites['site_id'] == site_i, 'is_paired'] = True
sites.loc[sites['site_id'] == site_i, 'pair_partner'] = site_j
sites.loc[sites['site_id'] == site_j, 'k_single'] = k_j_single
sites.loc[sites['site_id'] == site_j, 'k_dual'] = k_ij
sites.loc[sites['site_id'] == site_j, 'is_paired'] = True
sites.loc[sites['site_id'] == site_j, 'pair_partner'] = site_i
paired_sites.add(site_i)
paired_sites.add(site_j)
pair_visits.append({
'pair_id': idx + 1,
'site_i_id': site_i,
'site_j_id': site_j,
'site_i_name': row['site_i_name'],
'site_j_name': row['site_j_name'],
'k_i_original': k_i,
'k_j_original': k_j,
'k_dual': k_ij,
'k_i_single': k_i_single,
'k_j_single': k_j_single,
'q_final': row['q_final'],
'E_total': row['E_total']
})
df_pair_visits = pd.DataFrame(pair_visits)
# ============================================
# 计算释放的槽位和重分配
# ============================================
print(f"\n" + "-" * 40)
print("计算释放槽位")
print("-" * 40)
# 当前访问事件统计
total_single = sites['k_single'].sum()
total_dual = df_pair_visits['k_dual'].sum()
total_events_current = total_single + total_dual
print(f"单站点访问次数: {total_single}")
print(f"双站点访问次数: {total_dual}")
print(f"当前总访问事件: {total_events_current}")
# 需要填补的槽位
delta_N = TOTAL_EVENTS - total_events_current
print(f"需要填补的槽位: {delta_N}")
# 按需求比例重分配
if delta_N > 0:
print(f"\n重分配 {delta_N} 次额外访问...")
# 计算每个站点的需求权重
total_demand = sites['mu_tilde'].sum()
sites['demand_weight'] = sites['mu_tilde'] / total_demand
# 按比例分配使用Hamilton方法
sites['k_extra_float'] = delta_N * sites['demand_weight']
sites['k_extra'] = sites['k_extra_float'].apply(np.floor).astype(int)
# 处理余数
remainder = delta_N - sites['k_extra'].sum()
if remainder > 0:
# 按小数部分降序分配余数
fractional = sites['k_extra_float'] - sites['k_extra']
top_indices = fractional.nlargest(int(remainder)).index
sites.loc[top_indices, 'k_extra'] += 1
# 更新最终单站点访问次数
sites['k_single_final'] = sites['k_single'] + sites['k_extra']
else:
sites['k_extra'] = 0
sites['k_single_final'] = sites['k_single']
# ============================================
# 验证和统计
# ============================================
print(f"\n" + "-" * 40)
print("验证和统计")
print("-" * 40)
# 最终统计
final_single = sites['k_single_final'].sum()
final_dual = df_pair_visits['k_dual'].sum()
final_total = final_single + final_dual
print(f"最终单站点访问: {final_single}")
print(f"最终双站点访问: {final_dual}")
print(f"最终总访问事件: {final_total}")
print(f"目标访问事件: {TOTAL_EVENTS}")
print(f"差异: {final_total - TOTAL_EVENTS}")
# 站点级别统计
print(f"\n站点访问次数变化:")
sites['k_total_final'] = sites['k_single_final'] + sites['k_dual']
print(f" - 原始k范围: [{sites['k_original'].min()}, {sites['k_original'].max()}]")
print(f" - 最终k范围: [{sites['k_total_final'].min()}, {sites['k_total_final'].max()}]")
print(f" - 额外分配范围: [{sites['k_extra'].min()}, {sites['k_extra'].max()}]")
# 配对统计
if len(df_pair_visits) > 0:
print(f"\n配对访问统计:")
print(f" - 配对数: {len(df_pair_visits)}")
print(f" - 双站点访问总次数: {df_pair_visits['k_dual'].sum()}")
print(f" - 每对双站点访问范围: [{df_pair_visits['k_dual'].min()}, {df_pair_visits['k_dual'].max()}]")
# ============================================
# 保存结果
# ============================================
with pd.ExcelWriter(OUTPUT_FILE, engine='openpyxl') as writer:
# Sheet 1: 站点最终访问次数
sites_output = sites[['site_id', 'site_name', 'mu', 'sigma', 'mu_tilde',
'k_original', 'k_single', 'k_extra', 'k_single_final',
'k_dual', 'k_total_final', 'is_paired', 'pair_partner']]
sites_output.to_excel(writer, sheet_name='sites_schedule', index=False)
# Sheet 2: 配对访问明细
df_pair_visits.to_excel(writer, sheet_name='pair_visits', index=False)
# Sheet 3: 参数记录
params = pd.DataFrame({
'parameter': ['MERGE_RATIO', 'TOTAL_EVENTS', 'delta_N'],
'value': [MERGE_RATIO, TOTAL_EVENTS, delta_N],
'description': ['合并比例', '年度总访问事件', '额外分配次数']
})
params.to_excel(writer, sheet_name='parameters', index=False)
# Sheet 4: 汇总统计
summary = pd.DataFrame({
'metric': ['total_sites', 'paired_sites', 'unpaired_sites',
'total_pairs', 'total_dual_visits', 'total_single_visits',
'total_events', 'original_total_visits'],
'value': [len(sites), len(paired_sites), len(sites) - len(paired_sites),
len(df_pair_visits), final_dual, final_single,
final_total, sites['k_original'].sum()]
})
summary.to_excel(writer, sheet_name='summary', index=False)
print(f"\n结果已保存至: {OUTPUT_FILE}")
print(" - Sheet 'sites_schedule': 站点最终访问次数")
print(" - Sheet 'pair_visits': 配对访问明细")
print(" - Sheet 'parameters': 参数记录")
print(" - Sheet 'summary': 汇总统计")
print("\n" + "=" * 60)

BIN
task3/04_reschedule.xlsx Normal file

Binary file not shown.

332
task3/05_calendar.py Normal file
View File

@@ -0,0 +1,332 @@
"""
Task 3 - Step 5: 日历排程生成
=============================
输入:
- 04_reschedule.xlsx (站点访问次数 + 配对访问明细)
输出: 05_calendar.xlsx (365天的完整排程)
排程逻辑:
1. 生成所有访问事件(单站点 + 双站点)
2. 为每个事件计算理想日期(均匀分布)
3. 贪心分配到365天每天2个事件槽位
4. 优先安排双站点访问(时间较长)
约束:
- 每天恰好2个访问事件
- 同一站点的访问尽量均匀分布
"""
import pandas as pd
import numpy as np
from collections import defaultdict
# ============================================
# 参数设置
# ============================================
INPUT_FILE = '04_reschedule.xlsx'
OUTPUT_FILE = '05_calendar.xlsx'
DAYS_PER_YEAR = 365
EVENTS_PER_DAY = 2
TOTAL_EVENTS = DAYS_PER_YEAR * EVENTS_PER_DAY # 730
# ============================================
# 读取数据
# ============================================
print("=" * 60)
print("Task 3 - Step 5: 日历排程生成")
print("=" * 60)
# 读取站点访问次数
df_sites = pd.read_excel(INPUT_FILE, sheet_name='sites_schedule')
print(f"\n读取站点数据: {len(df_sites)} 个站点")
# 读取配对访问
df_pairs = pd.read_excel(INPUT_FILE, sheet_name='pair_visits')
print(f"读取配对数据: {len(df_pairs)}")
# ============================================
# 生成访问事件
# ============================================
print(f"\n" + "-" * 40)
print("生成访问事件")
print("-" * 40)
events = []
event_id = 0
# 1. 生成单站点访问事件
for _, row in df_sites.iterrows():
site_id = row['site_id']
site_name = row['site_name']
k_single = int(row['k_single_final'])
for visit_num in range(k_single):
# 计算理想日期(均匀分布)
ideal_day = (visit_num + 0.5) * DAYS_PER_YEAR / k_single
events.append({
'event_id': event_id,
'event_type': 'single',
'site_i_id': site_id,
'site_j_id': None,
'site_i_name': site_name,
'site_j_name': None,
'visit_num': visit_num + 1,
'total_visits': k_single,
'ideal_day': ideal_day,
'priority': 0 # 单站点优先级较低
})
event_id += 1
# 2. 生成双站点访问事件
for _, row in df_pairs.iterrows():
site_i_id = row['site_i_id']
site_j_id = row['site_j_id']
site_i_name = row['site_i_name']
site_j_name = row['site_j_name']
k_dual = int(row['k_dual'])
for visit_num in range(k_dual):
ideal_day = (visit_num + 0.5) * DAYS_PER_YEAR / k_dual
events.append({
'event_id': event_id,
'event_type': 'dual',
'site_i_id': site_i_id,
'site_j_id': site_j_id,
'site_i_name': site_i_name,
'site_j_name': site_j_name,
'visit_num': visit_num + 1,
'total_visits': k_dual,
'ideal_day': ideal_day,
'priority': 1 # 双站点优先级较高
})
event_id += 1
df_events = pd.DataFrame(events)
print(f"总访问事件数: {len(df_events)}")
print(f" - 单站点: {(df_events['event_type'] == 'single').sum()}")
print(f" - 双站点: {(df_events['event_type'] == 'dual').sum()}")
# ============================================
# 贪心排程算法
# ============================================
print(f"\n" + "-" * 40)
print("执行贪心排程")
print("-" * 40)
# 按理想日期和优先级排序
df_events_sorted = df_events.sort_values(
['ideal_day', 'priority'],
ascending=[True, False]
).reset_index(drop=True)
# 初始化日历槽位
calendar = {day: [] for day in range(1, DAYS_PER_YEAR + 1)}
# 记录每个站点最后访问的日期
last_visit = defaultdict(lambda: -float('inf'))
# 贪心分配
assigned_day = []
for idx, event in df_events_sorted.iterrows():
ideal = event['ideal_day']
site_i = event['site_i_id']
site_j = event['site_j_id']
# 寻找最佳可用日期
best_day = None
best_score = float('inf')
# 搜索范围:理想日期附近
search_start = max(1, int(ideal) - 30)
search_end = min(DAYS_PER_YEAR, int(ideal) + 30)
for day in range(search_start, search_end + 1):
# 检查槽位是否可用
if len(calendar[day]) >= EVENTS_PER_DAY:
continue
# 检查同一站点是否已在当天访问
sites_on_day = set()
for e in calendar[day]:
sites_on_day.add(e['site_i_id'])
if e['site_j_id'] is not None:
sites_on_day.add(e['site_j_id'])
if site_i in sites_on_day:
continue
if site_j is not None and site_j in sites_on_day:
continue
# 计算得分(理想日期偏差 + 间隔惩罚)
day_diff = abs(day - ideal)
# 间隔惩罚:鼓励与上次访问保持距离
min_gap = min(
day - last_visit[site_i],
day - last_visit[site_j] if site_j is not None else float('inf')
)
gap_penalty = max(0, 7 - min_gap) * 2 # 7天内再次访问有惩罚
score = day_diff + gap_penalty
if score < best_score:
best_score = score
best_day = day
# 如果附近没找到,扩大搜索范围
if best_day is None:
for day in range(1, DAYS_PER_YEAR + 1):
if len(calendar[day]) < EVENTS_PER_DAY:
sites_on_day = set()
for e in calendar[day]:
sites_on_day.add(e['site_i_id'])
if e['site_j_id'] is not None:
sites_on_day.add(e['site_j_id'])
if site_i not in sites_on_day:
if site_j is None or site_j not in sites_on_day:
best_day = day
break
if best_day is None:
print(f"警告: 无法分配事件 {event['event_id']}")
best_day = 1 # 强制分配
# 分配到日历
calendar[best_day].append(event.to_dict())
assigned_day.append(best_day)
# 更新最后访问日期
last_visit[site_i] = best_day
if site_j is not None:
last_visit[site_j] = best_day
df_events_sorted['assigned_day'] = assigned_day
# ============================================
# 生成日历视图
# ============================================
print(f"\n" + "-" * 40)
print("生成日历视图")
print("-" * 40)
calendar_rows = []
for day in range(1, DAYS_PER_YEAR + 1):
events_on_day = calendar[day]
row = {'day': day}
for slot in range(EVENTS_PER_DAY):
if slot < len(events_on_day):
e = events_on_day[slot]
row[f'slot_{slot+1}_type'] = e['event_type']
row[f'slot_{slot+1}_site_i'] = e['site_i_id']
row[f'slot_{slot+1}_site_j'] = e['site_j_id']
row[f'slot_{slot+1}_name_i'] = e['site_i_name']
row[f'slot_{slot+1}_name_j'] = e['site_j_name']
else:
row[f'slot_{slot+1}_type'] = None
row[f'slot_{slot+1}_site_i'] = None
row[f'slot_{slot+1}_site_j'] = None
row[f'slot_{slot+1}_name_i'] = None
row[f'slot_{slot+1}_name_j'] = None
calendar_rows.append(row)
df_calendar = pd.DataFrame(calendar_rows)
# ============================================
# 统计和验证
# ============================================
print(f"\n排程统计:")
# 每天事件数
events_per_day = [len(calendar[d]) for d in range(1, DAYS_PER_YEAR + 1)]
print(f" - 每天事件数: min={min(events_per_day)}, max={max(events_per_day)}, avg={np.mean(events_per_day):.2f}")
# 理想日期偏差
df_events_sorted['day_diff'] = abs(df_events_sorted['assigned_day'] - df_events_sorted['ideal_day'])
print(f" - 理想日期偏差: avg={df_events_sorted['day_diff'].mean():.2f}, max={df_events_sorted['day_diff'].max():.0f}")
# 访问间隔分析
print(f"\n访问间隔分析:")
site_visits = defaultdict(list)
for _, event in df_events_sorted.iterrows():
site_visits[event['site_i_id']].append(event['assigned_day'])
# 只有双站点访问才有site_j_id
if event['site_j_id'] is not None and not (isinstance(event['site_j_id'], float) and np.isnan(event['site_j_id'])):
site_visits[event['site_j_id']].append(event['assigned_day'])
gaps = []
for site_id, days in site_visits.items():
days_sorted = sorted(days)
for i in range(1, len(days_sorted)):
gaps.append(days_sorted[i] - days_sorted[i-1])
if gaps:
print(f" - 间隔范围: [{min(gaps)}, {max(gaps)}] 天")
print(f" - 平均间隔: {np.mean(gaps):.1f}")
print(f" - 中位数间隔: {np.median(gaps):.1f}")
# ============================================
# 保存结果
# ============================================
with pd.ExcelWriter(OUTPUT_FILE, engine='openpyxl') as writer:
# Sheet 1: 日历视图
df_calendar.to_excel(writer, sheet_name='calendar', index=False)
# Sheet 2: 事件详情(含分配日期)
df_events_sorted.to_excel(writer, sheet_name='events', index=False)
# Sheet 3: 站点访问日期汇总
site_schedule = []
for site_id, days in site_visits.items():
# 跳过None/NaN值
if site_id is None or (isinstance(site_id, float) and np.isnan(site_id)):
continue
# 处理可能的类型不匹配
site_row = df_sites[df_sites['site_id'] == int(site_id)]
if len(site_row) > 0:
site_name = site_row['site_name'].values[0]
else:
site_name = f"Site_{site_id}"
days_sorted = sorted(days)
site_schedule.append({
'site_id': int(site_id),
'site_name': site_name,
'total_visits': len(days),
'visit_days': ','.join(map(str, days_sorted)),
'first_visit': days_sorted[0],
'last_visit': days_sorted[-1]
})
df_site_schedule = pd.DataFrame(site_schedule)
df_site_schedule.to_excel(writer, sheet_name='site_visits', index=False)
# Sheet 4: 汇总统计
summary = pd.DataFrame({
'metric': ['total_days', 'total_events', 'single_events', 'dual_events',
'avg_day_diff', 'max_day_diff', 'avg_gap', 'min_gap', 'max_gap'],
'value': [DAYS_PER_YEAR, len(df_events),
(df_events['event_type'] == 'single').sum(),
(df_events['event_type'] == 'dual').sum(),
df_events_sorted['day_diff'].mean(),
df_events_sorted['day_diff'].max(),
np.mean(gaps) if gaps else 0,
min(gaps) if gaps else 0,
max(gaps) if gaps else 0]
})
summary.to_excel(writer, sheet_name='summary', index=False)
print(f"\n结果已保存至: {OUTPUT_FILE}")
print(" - Sheet 'calendar': 365天日历视图")
print(" - Sheet 'events': 事件详情")
print(" - Sheet 'site_visits': 站点访问日期汇总")
print(" - Sheet 'summary': 汇总统计")
print("\n" + "=" * 60)

BIN
task3/05_calendar.xlsx Normal file

Binary file not shown.

348
task3/06_evaluate.py Normal file
View File

@@ -0,0 +1,348 @@
"""
Task 3 - Step 6: 效果评估
=========================
输入:
- 04_reschedule.xlsx (访问次数)
- 03_allocation.xlsx (配对分配方案)
- ../task1/03_allocate.xlsx (Task 1结果用于对比)
- ../task1/04_metrics.xlsx (Task 1指标用于对比)
输出: 06_evaluate.xlsx (评估指标对比)
评估指标:
- E1': 期望总服务量
- E2': 质量加权服务量(总量计算衰减)
- F1': 满足率Gini系数
- F2': 最低满足率
- R1: 服务缺口风险
- RS: 资源节省率
有效性衰减公式(总量计算):
q(μ_i + μ_j) = min(1, 250 / (μ_i + μ_j))
"""
import pandas as pd
import numpy as np
from scipy import stats
# ============================================
# 参数设置
# ============================================
RESCHEDULE_FILE = '04_reschedule.xlsx'
ALLOCATION_FILE = '03_allocation.xlsx'
TASK1_ALLOC_FILE = '../task1/03_allocate.xlsx'
TASK1_METRIC_FILE = '../task1/04_metrics.xlsx'
OUTPUT_FILE = '06_evaluate.xlsx'
Q = 400 # 卡车容量
QUALITY_THRESHOLD = 250 # 质量衰减阈值
SHORTFALL_THRESHOLD = 0.8 # 服务缺口阈值
# ============================================
# 辅助函数
# ============================================
def quality_factor(mu_total):
"""质量折扣因子(总量计算)"""
return min(1.0, QUALITY_THRESHOLD / mu_total) if mu_total > 0 else 1.0
def expected_service(q, mu, sigma):
"""E[min(D, q)]"""
if sigma == 0:
return min(mu, q)
z = (q - mu) / sigma
return mu * stats.norm.cdf(z) - sigma * stats.norm.pdf(z) + q * (1 - stats.norm.cdf(z))
def gini_coefficient(values):
"""计算Gini系数"""
values = np.array(values)
values = values[~np.isnan(values)]
if len(values) == 0:
return 0
values = np.sort(values)
n = len(values)
cumsum = np.cumsum(values)
return (2 * np.sum((np.arange(1, n + 1) * values)) - (n + 1) * cumsum[-1]) / (n * cumsum[-1]) if cumsum[-1] > 0 else 0
def shortfall_probability(q, mu, sigma, threshold=0.8):
"""P(S/D < threshold),即服务不足的概率"""
# 简化计算P(min(D,q) / D < 0.8) ≈ P(D > q/0.8)
if sigma == 0:
return 0 if q >= mu * threshold else 1
critical_demand = q / threshold
return 1 - stats.norm.cdf((critical_demand - mu) / sigma)
# ============================================
# 读取数据
# ============================================
print("=" * 60)
print("Task 3 - Step 6: 效果评估")
print("=" * 60)
# Task 3 数据
df_sites = pd.read_excel(RESCHEDULE_FILE, sheet_name='sites_schedule')
df_pairs = pd.read_excel(ALLOCATION_FILE, sheet_name='allocation')
df_pair_visits = pd.read_excel(RESCHEDULE_FILE, sheet_name='pair_visits')
print(f"\n读取Task 3数据:")
print(f" - 站点数: {len(df_sites)}")
print(f" - 配对数: {len(df_pairs)}")
# Task 1 数据(用于对比)
df_task1 = pd.read_excel(TASK1_ALLOC_FILE)
try:
df_task1_metrics = pd.read_excel(TASK1_METRIC_FILE)
has_task1_metrics = True
except:
has_task1_metrics = False
print(" - 未找到Task 1指标文件将重新计算")
# ============================================
# 计算 Task 3 指标
# ============================================
print(f"\n" + "-" * 40)
print("计算 Task 3 指标")
print("-" * 40)
# 合并配对访问信息
pair_k = {(row['site_i_id'], row['site_j_id']): row['k_dual']
for _, row in df_pair_visits.iterrows()}
# === E1': 期望总服务量 ===
E1_prime = 0
# 单站点贡献
for _, row in df_sites.iterrows():
k_single = row['k_single_final']
mu = row['mu']
E1_prime += k_single * mu
# 双站点贡献
for _, row in df_pairs.iterrows():
pair_key = (row['site_i_id'], row['site_j_id'])
k_dual = pair_k.get(pair_key, 0)
E_total = row['E_total']
E1_prime += k_dual * E_total
print(f"E1' (期望总服务量): {E1_prime:.0f}")
# === E2': 质量加权服务量 ===
E2_prime = 0
# 单站点贡献
for _, row in df_sites.iterrows():
k_single = row['k_single_final']
mu = row['mu']
q_factor = quality_factor(mu)
E2_prime += k_single * q_factor * mu
# 双站点贡献(总量计算衰减)
for _, row in df_pairs.iterrows():
pair_key = (row['site_i_id'], row['site_j_id'])
k_dual = pair_k.get(pair_key, 0)
mu_sum = row['mu_i'] + row['mu_j']
E_total = row['E_total']
q_factor = quality_factor(mu_sum) # 总量计算
E2_prime += k_dual * q_factor * E_total
print(f"E2' (质量加权服务量): {E2_prime:.0f}")
# === 满足率计算 ===
# Task 1定义: r_i = k_i * μ_i / μ̃_i (等效服务次数)
# Task 3: 需要分别计算单站点和双站点的贡献
site_satisfaction = {}
for _, row in df_sites.iterrows():
site_id = row['site_id']
mu = row['mu']
mu_tilde = row['mu_tilde']
k_single = row['k_single_final']
# 单站点贡献: k_single * μ / μ̃
r_single = k_single * mu / mu_tilde if mu_tilde > 0 else 0
site_satisfaction[site_id] = r_single
# 加上双站点贡献
for _, row in df_pairs.iterrows():
pair_key = (row['site_i_id'], row['site_j_id'])
k_dual = pair_k.get(pair_key, 0)
# 双站点贡献: k_dual * E[S_i] / μ̃_i
mu_tilde_i = df_sites[df_sites['site_id'] == row['site_i_id']]['mu_tilde'].values[0]
mu_tilde_j = df_sites[df_sites['site_id'] == row['site_j_id']]['mu_tilde'].values[0]
r_dual_i = k_dual * row['E_Si'] / mu_tilde_i if mu_tilde_i > 0 else 0
r_dual_j = k_dual * row['E_Sj'] / mu_tilde_j if mu_tilde_j > 0 else 0
site_satisfaction[row['site_i_id']] += r_dual_i
site_satisfaction[row['site_j_id']] += r_dual_j
# 计算满足率列表
satisfaction_rates = list(site_satisfaction.values())
# === F1': Gini系数 ===
F1_prime = gini_coefficient(satisfaction_rates)
print(f"F1' (满足率Gini): {F1_prime:.4f}")
# === F2': 最低满足率 ===
F2_prime = min(satisfaction_rates) if satisfaction_rates else 0
print(f"F2' (最低满足率): {F2_prime:.4f}")
# === R1: 服务缺口风险 ===
# 双站点访问时,任一站点服务不足的概率
shortfall_probs = []
for _, row in df_pairs.iterrows():
q_final = row['q_final']
mu_i, sigma_i = row['mu_i'], row['sigma_i']
mu_j, sigma_j = row['mu_j'], row['sigma_j']
# 站点i的缺口概率
p_i = shortfall_probability(q_final, mu_i, sigma_i, SHORTFALL_THRESHOLD)
# 站点j的缺口概率
p_j = shortfall_probability(Q - q_final, mu_j, sigma_j, SHORTFALL_THRESHOLD)
# 至少一个站点缺口的概率
p_either = 1 - (1 - p_i) * (1 - p_j)
shortfall_probs.append(p_either)
R1 = np.mean(shortfall_probs) if shortfall_probs else 0
print(f"R1 (平均缺口风险): {R1:.4f}")
# === RS: 资源节省率 ===
total_dual = df_pair_visits['k_dual'].sum()
RS = total_dual / 730 * 100
print(f"RS (资源节省率): {RS:.2f}%")
# ============================================
# 计算 Task 1 指标(用于对比)
# ============================================
print(f"\n" + "-" * 40)
print("计算 Task 1 指标(对比基准)")
print("-" * 40)
# E1: 总服务量
E1_task1 = (df_task1['k'] * df_task1['mu']).sum()
print(f"E1 (Task 1 总服务量): {E1_task1:.0f}")
# E2: 质量加权
E2_task1 = 0
for _, row in df_task1.iterrows():
q_factor = quality_factor(row['mu'])
E2_task1 += row['k'] * q_factor * row['mu']
print(f"E2 (Task 1 质量加权): {E2_task1:.0f}")
# F1: Gini系数 - 使用与Task 3相同的r定义
# r_i = k_i * μ_i / μ̃_i
task1_rates = []
for _, row in df_task1.iterrows():
r = row['k'] * row['mu'] / row['mu_tilde'] if row['mu_tilde'] > 0 else 0
task1_rates.append(r)
F1_task1 = gini_coefficient(task1_rates)
print(f"F1 (Task 1 Gini): {F1_task1:.4f}")
# F2: 最低满足率
F2_task1 = min(task1_rates) if task1_rates else 0
print(f"F2 (Task 1 最低满足率): {F2_task1:.4f}")
# ============================================
# 对比分析
# ============================================
print(f"\n" + "-" * 40)
print("Task 3 vs Task 1 对比")
print("-" * 40)
comparison = pd.DataFrame({
'Metric': ['E1 (总服务量)', 'E2 (质量加权)', 'F1 (Gini系数)', 'F2 (最低满足率)'],
'Task 1': [E1_task1, E2_task1, F1_task1, F2_task1],
'Task 3': [E1_prime, E2_prime, F1_prime, F2_prime],
'Change': [E1_prime - E1_task1, E2_prime - E2_task1,
F1_prime - F1_task1, F2_prime - F2_task1],
'Change %': [(E1_prime - E1_task1) / E1_task1 * 100,
(E2_prime - E2_task1) / E2_task1 * 100,
(F1_prime - F1_task1) / F1_task1 * 100 if F1_task1 != 0 else 0,
(F2_prime - F2_task1) / F2_task1 * 100 if F2_task1 != 0 else 0]
})
print(comparison.to_string(index=False))
# ============================================
# 保存结果
# ============================================
with pd.ExcelWriter(OUTPUT_FILE, engine='openpyxl') as writer:
# Sheet 1: 指标对比
comparison.to_excel(writer, sheet_name='comparison', index=False)
# Sheet 2: Task 3 详细指标
task3_metrics = pd.DataFrame({
'metric': ['E1_prime', 'E2_prime', 'F1_prime', 'F2_prime',
'R1', 'RS', 'total_pairs', 'total_dual_visits'],
'value': [E1_prime, E2_prime, F1_prime, F2_prime,
R1, RS/100, len(df_pairs), total_dual],
'description': ['期望总服务量', '质量加权服务量', '满足率Gini系数', '最低满足率',
'平均缺口风险', '资源节省率', '配对数', '双站点访问次数']
})
task3_metrics.to_excel(writer, sheet_name='task3_metrics', index=False)
# Sheet 3: 站点级别满足率
site_rates = []
for site_id, r in site_satisfaction.items():
site_row = df_sites[df_sites['site_id'] == site_id]
if len(site_row) > 0:
site_name = site_row['site_name'].values[0]
mu = site_row['mu'].values[0]
mu_tilde = site_row['mu_tilde'].values[0]
k_single = site_row['k_single_final'].values[0]
k_dual = site_row['k_dual'].values[0]
else:
site_name = f"Site_{site_id}"
mu, mu_tilde, k_single, k_dual = 0, 0, 0, 0
site_rates.append({
'site_id': site_id,
'site_name': site_name,
'mu': mu,
'mu_tilde': mu_tilde,
'k_single': k_single,
'k_dual': k_dual,
'satisfaction_rate_r': r
})
df_site_rates = pd.DataFrame(site_rates)
df_site_rates = df_site_rates.sort_values('satisfaction_rate_r')
df_site_rates.to_excel(writer, sheet_name='site_satisfaction', index=False)
# Sheet 4: 配对风险分析
pair_risk = []
for idx, row in df_pairs.iterrows():
pair_key = (row['site_i_id'], row['site_j_id'])
k_dual = pair_k.get(pair_key, 0)
q_final = row['q_final']
mu_i, sigma_i = row['mu_i'], row['sigma_i']
mu_j, sigma_j = row['mu_j'], row['sigma_j']
p_i = shortfall_probability(q_final, mu_i, sigma_i, SHORTFALL_THRESHOLD)
p_j = shortfall_probability(Q - q_final, mu_j, sigma_j, SHORTFALL_THRESHOLD)
pair_risk.append({
'site_i_name': row['site_i_name'],
'site_j_name': row['site_j_name'],
'k_dual': k_dual,
'q_final': q_final,
'shortfall_prob_i': p_i,
'shortfall_prob_j': p_j,
'shortfall_prob_either': 1 - (1 - p_i) * (1 - p_j)
})
df_pair_risk = pd.DataFrame(pair_risk)
df_pair_risk = df_pair_risk.sort_values('shortfall_prob_either', ascending=False)
df_pair_risk.to_excel(writer, sheet_name='pair_risk', index=False)
print(f"\n结果已保存至: {OUTPUT_FILE}")
print(" - Sheet 'comparison': Task 1 vs Task 3 对比")
print(" - Sheet 'task3_metrics': Task 3 详细指标")
print(" - Sheet 'site_satisfaction': 站点满足率")
print(" - Sheet 'pair_risk': 配对风险分析")
print("\n" + "=" * 60)

BIN
task3/06_evaluate.xlsx Normal file

Binary file not shown.

View File

@@ -663,3 +663,80 @@ flowchart TB
| 未满足惩罚 | $\lambda$ | 0.5 | 敏感性分析 |
| 最大缺口惩罚 | $\eta$ | 0.3 | 敏感性分析 |
| 鲁棒性水平 | $k$ | 1 | 84%保护 |
| **合并比例** | $r_{merge}$ | 1/2 | 保留50%独立访问 |
---
## 附录D实现决策记录
### D.1 关键设计决策
| 决策项 | 选择 | 理由 |
|--------|------|------|
| 数据来源 | Task 1结果 (`task1/03_allocate.xlsx`) | 复用已验证的需求修正和频次分配 |
| 有效性衰减计算 | **总量计算** $q(\mu_i + \mu_j)$ | 双站点共享同一卡车,总负载决定服务质量 |
| 合并比例 | $k_{max} = \lfloor \min(k_i, k_j) / 2 \rfloor$ | 保留50%独立访问,平衡效率与风险 |
| 配对策略 | 每站点最多配对一次 | 简化实现,足以说明方法论 |
| 双站点访问计数 | 算1次访问事件 | 释放槽位给其他站点 |
### D.2 有效性衰减公式(总量计算)
双站点访问时,质量折扣因子按总服务量计算:
$$q_{ij} = \min\left(1, \frac{250}{\mu_i + \mu_j}\right)$$
**E2'的计算**
$$E_2' = \sum_{\text{单站点}} k_i \cdot q(\mu_i) \cdot \mu_i + \sum_{\text{双站点}} k_{ij} \cdot q(\mu_i + \mu_j) \cdot E[S_i + S_j]$$
### D.3 合并比例详述
对于配对 $(i, j)$,原频次为 $k_i, k_j$
| 项目 | 公式 |
|------|------|
| 双站点访问次数 | $k_{ij} = \lfloor \min(k_i, k_j) / 2 \rfloor$ |
| 站点i剩余单独访问 | $k_i' = k_i - k_{ij}$ |
| 站点j剩余单独访问 | $k_j' = k_j - k_{ij}$ |
| 释放的访问槽位 | $\Delta N = \sum k_{ij}$ |
---
## 附录E敏感性分析计划
### E.1 待分析参数
| 参数 | 基准值 | 扫描范围 | 预期影响 |
|------|--------|---------|---------|
| **合并比例** $r_{merge}$ | 1/2 | [1/3, 1/2, 2/3] | 配对数量、资源节省 |
| 距离阈值 $l_{max}$ | 50 mi | [30, 40, 50, 60, 70] | 可行配对数 |
| 容量上限 $\mu_i + \mu_j$ | 450 | [400, 425, 450, 475, 500] | 配对选择范围 |
| CV阈值 | 0.5 | [0.3, 0.4, 0.5, 0.6] | 配对稳定性 |
### E.2 敏感性分析输出
- 各参数对 E1', E2', F1', R1 的影响曲线
- 参数交互效应热力图
- 稳健性结论
---
## 附录F程序流水线
```
task3/
├── 01_distance.py # 距离矩阵计算
│ └── 01_distance.xlsx
├── 02_pairing.py # 配对筛选与选择
│ └── 02_pairing.xlsx
├── 03_allocation.py # 最优分配计算
│ └── 03_allocation.xlsx
├── 04_reschedule.py # 访问次数重分配
│ └── 04_reschedule.xlsx
├── 05_calendar.py # 日历排程生成
│ └── 05_calendar.xlsx
├── 06_evaluate.py # 效果评估
│ └── 06_evaluate.xlsx
├── 07_sensitivity.py # 敏感性分析(待实现)
│ └── 07_sensitivity.xlsx
└── figures/ # 可视化输出
```