diff --git a/task3/01_distance.py b/task3/01_distance.py new file mode 100644 index 0000000..fb8a012 --- /dev/null +++ b/task3/01_distance.py @@ -0,0 +1,119 @@ +""" +Task 3 - Step 1: 距离矩阵计算 +============================= + +输入: task1/03_allocate.xlsx (70个站点的坐标信息) +输出: task3/01_distance.xlsx (70×70距离矩阵 + 站点信息) + +距离计算公式 (Haversine简化): +l_ij = 69.0 * sqrt((lat_i - lat_j)^2 + cos^2(lat_avg * pi/180) * (lon_i - lon_j)^2) + +单位: 英里 +""" + +import pandas as pd +import numpy as np + +# ============================================ +# 参数设置 +# ============================================ +INPUT_FILE = '../task1/03_allocate.xlsx' +OUTPUT_FILE = '01_distance.xlsx' + +# ============================================ +# 读取数据 +# ============================================ +print("=" * 60) +print("Task 3 - Step 1: 距离矩阵计算") +print("=" * 60) + +df = pd.read_excel(INPUT_FILE) +print(f"\n读取站点数据: {len(df)} 个站点") +print(f"列名: {df.columns.tolist()}") + +# 提取关键列 +sites = df[['site_id', 'site_name', 'lat', 'lon', 'mu', 'sigma', 'mu_tilde', 'k']].copy() +print(f"\n站点数据概览:") +print(sites.head()) + +# ============================================ +# 距离计算函数 +# ============================================ +def calc_distance(lat1, lon1, lat2, lon2): + """ + 计算两点间的近似距离(英里) + 使用Haversine公式的简化版本,适用于小范围地域 + """ + lat_avg = (lat1 + lat2) / 2 + lat_avg_rad = np.radians(lat_avg) + + delta_lat = lat1 - lat2 + delta_lon = lon1 - lon2 + + # 69.0 miles per degree of latitude + # cos(lat) correction for longitude + distance = 69.0 * np.sqrt(delta_lat**2 + (np.cos(lat_avg_rad) * delta_lon)**2) + + return distance + +# ============================================ +# 构建距离矩阵 +# ============================================ +n = len(sites) +distance_matrix = np.zeros((n, n)) + +for i in range(n): + for j in range(n): + if i != j: + distance_matrix[i, j] = calc_distance( + sites.iloc[i]['lat'], sites.iloc[i]['lon'], + sites.iloc[j]['lat'], sites.iloc[j]['lon'] + ) + +# 转换为DataFrame +site_ids = sites['site_id'].values +df_distance = pd.DataFrame(distance_matrix, index=site_ids, columns=site_ids) + +# ============================================ +# 统计信息 +# ============================================ +# 提取上三角(排除对角线) +upper_tri = distance_matrix[np.triu_indices(n, k=1)] + +print(f"\n距离矩阵统计:") +print(f" - 站点对总数: {len(upper_tri)}") +print(f" - 最小距离: {upper_tri.min():.2f} 英里") +print(f" - 最大距离: {upper_tri.max():.2f} 英里") +print(f" - 平均距离: {upper_tri.mean():.2f} 英里") +print(f" - 中位数距离: {np.median(upper_tri):.2f} 英里") + +# 按阈值统计 +thresholds = [30, 40, 50, 60, 70] +print(f"\n距离阈值统计:") +for th in thresholds: + count = np.sum(upper_tri <= th) + print(f" - ≤ {th} 英里: {count} 对 ({count/len(upper_tri)*100:.1f}%)") + +# ============================================ +# 保存结果 +# ============================================ +with pd.ExcelWriter(OUTPUT_FILE, engine='openpyxl') as writer: + # Sheet 1: 站点信息 + sites.to_excel(writer, sheet_name='sites', index=False) + + # Sheet 2: 距离矩阵 + df_distance.to_excel(writer, sheet_name='distance_matrix') + + # Sheet 3: 距离统计 + stats = pd.DataFrame({ + 'metric': ['min', 'max', 'mean', 'median', 'std', 'total_pairs'], + 'value': [upper_tri.min(), upper_tri.max(), upper_tri.mean(), + np.median(upper_tri), upper_tri.std(), len(upper_tri)] + }) + stats.to_excel(writer, sheet_name='statistics', index=False) + +print(f"\n结果已保存至: {OUTPUT_FILE}") +print(" - Sheet 'sites': 站点信息") +print(" - Sheet 'distance_matrix': 70×70距离矩阵") +print(" - Sheet 'statistics': 距离统计") +print("\n" + "=" * 60) diff --git a/task3/01_distance.xlsx b/task3/01_distance.xlsx new file mode 100644 index 0000000..64afdc6 Binary files /dev/null and b/task3/01_distance.xlsx differ diff --git a/task3/02_pairing.py b/task3/02_pairing.py new file mode 100644 index 0000000..f5100e2 --- /dev/null +++ b/task3/02_pairing.py @@ -0,0 +1,209 @@ +""" +Task 3 - Step 2: 共生站点配对筛选与选择 +======================================= + +输入: 01_distance.xlsx (距离矩阵 + 站点信息) +输出: 02_pairing.xlsx (筛选后的配对列表 + 最终选择) + +配对筛选条件: +1. 距离约束: l_ij ≤ l_max (50英里) +2. 容量约束: μ_i + μ_j ≤ 450 +3. 稳定性约束: CV_i ≤ 0.5 且 CV_j ≤ 0.5 + +配对价值函数: +V_ij = α * (μ_i + μ_j) / Q - β * l_ij / l_max - γ * (σ_i² + σ_j²) / (μ_i + μ_j)² + +配对选择: 贪心算法,每个站点最多配对一次 +""" + +import pandas as pd +import numpy as np + +# ============================================ +# 参数设置 +# ============================================ +INPUT_FILE = '01_distance.xlsx' +OUTPUT_FILE = '02_pairing.xlsx' + +# 约束参数 +L_MAX = 50 # 距离阈值 (英里) +MU_SUM_MAX = 450 # 需求和上限 +CV_MAX = 0.5 # 变异系数上限 +Q = 400 # 卡车容量 + +# 价值函数权重 +ALPHA = 1.0 # 容量利用率权重 +BETA = 0.3 # 距离惩罚权重 +GAMMA = 0.5 # 风险惩罚权重 + +# ============================================ +# 读取数据 +# ============================================ +print("=" * 60) +print("Task 3 - Step 2: 共生站点配对筛选与选择") +print("=" * 60) + +# 读取站点信息 +sites = pd.read_excel(INPUT_FILE, sheet_name='sites') +print(f"\n读取站点数据: {len(sites)} 个站点") + +# 读取距离矩阵 +dist_matrix = pd.read_excel(INPUT_FILE, sheet_name='distance_matrix', index_col=0) +print(f"读取距离矩阵: {dist_matrix.shape}") + +# 计算变异系数 +sites['cv'] = sites['sigma'] / sites['mu'] +print(f"\nCV范围: [{sites['cv'].min():.3f}, {sites['cv'].max():.3f}]") +print(f"CV > {CV_MAX} 的站点数: {(sites['cv'] > CV_MAX).sum()}") + +# ============================================ +# 配对筛选 +# ============================================ +print(f"\n" + "-" * 40) +print("配对筛选") +print("-" * 40) + +candidates = [] +n = len(sites) + +for i in range(n): + for j in range(i + 1, n): # 只考虑上三角 + site_i = sites.iloc[i] + site_j = sites.iloc[j] + + # 获取距离 + dist = dist_matrix.iloc[i, j] + + # 约束检查 + # 1. 距离约束 + if dist > L_MAX: + continue + + # 2. 容量约束 + mu_sum = site_i['mu'] + site_j['mu'] + if mu_sum > MU_SUM_MAX: + continue + + # 3. 稳定性约束 + if site_i['cv'] > CV_MAX or site_j['cv'] > CV_MAX: + continue + + # 通过所有约束,计算配对价值 + sigma_sq_sum = site_i['sigma']**2 + site_j['sigma']**2 + + # 价值函数 + value = (ALPHA * mu_sum / Q + - BETA * dist / L_MAX + - GAMMA * sigma_sq_sum / mu_sum**2) + + candidates.append({ + 'site_i_id': site_i['site_id'], + 'site_j_id': site_j['site_id'], + 'site_i_name': site_i['site_name'], + 'site_j_name': site_j['site_name'], + 'distance': dist, + 'mu_i': site_i['mu'], + 'mu_j': site_j['mu'], + 'mu_sum': mu_sum, + 'sigma_i': site_i['sigma'], + 'sigma_j': site_j['sigma'], + 'cv_i': site_i['cv'], + 'cv_j': site_j['cv'], + 'k_i': site_i['k'], + 'k_j': site_j['k'], + 'value': value + }) + +df_candidates = pd.DataFrame(candidates) +print(f"通过约束的候选配对数: {len(df_candidates)}") + +if len(df_candidates) > 0: + print(f"配对价值范围: [{df_candidates['value'].min():.3f}, {df_candidates['value'].max():.3f}]") + print(f"平均距离: {df_candidates['distance'].mean():.2f} 英里") + print(f"平均需求和: {df_candidates['mu_sum'].mean():.1f}") + +# ============================================ +# 贪心配对选择 +# ============================================ +print(f"\n" + "-" * 40) +print("贪心配对选择") +print("-" * 40) + +# 按价值降序排序 +df_candidates_sorted = df_candidates.sort_values('value', ascending=False).reset_index(drop=True) + +# 贪心选择 +selected_pairs = [] +used_sites = set() + +for _, row in df_candidates_sorted.iterrows(): + site_i = row['site_i_id'] + site_j = row['site_j_id'] + + # 检查是否已被使用 + if site_i not in used_sites and site_j not in used_sites: + selected_pairs.append(row.to_dict()) + used_sites.add(site_i) + used_sites.add(site_j) + +df_selected = pd.DataFrame(selected_pairs) +print(f"最终选择配对数: {len(df_selected)}") +print(f"涉及站点数: {len(used_sites)} (占总站点 {len(used_sites)/n*100:.1f}%)") +print(f"未配对站点数: {n - len(used_sites)}") + +if len(df_selected) > 0: + print(f"\n选中配对的价值范围: [{df_selected['value'].min():.3f}, {df_selected['value'].max():.3f}]") + print(f"选中配对的距离范围: [{df_selected['distance'].min():.2f}, {df_selected['distance'].max():.2f}] 英里") + print(f"选中配对的需求和范围: [{df_selected['mu_sum'].min():.1f}, {df_selected['mu_sum'].max():.1f}]") + +# ============================================ +# 显示选中的配对 +# ============================================ +print(f"\n" + "-" * 40) +print("选中的配对列表") +print("-" * 40) + +if len(df_selected) > 0: + display_cols = ['site_i_name', 'site_j_name', 'distance', 'mu_sum', 'k_i', 'k_j', 'value'] + print(df_selected[display_cols].to_string(index=False)) + +# ============================================ +# 保存结果 +# ============================================ +with pd.ExcelWriter(OUTPUT_FILE, engine='openpyxl') as writer: + # Sheet 1: 站点信息(含CV) + sites.to_excel(writer, sheet_name='sites', index=False) + + # Sheet 2: 所有候选配对 + df_candidates_sorted.to_excel(writer, sheet_name='all_candidates', index=False) + + # Sheet 3: 最终选择的配对 + df_selected.to_excel(writer, sheet_name='selected_pairs', index=False) + + # Sheet 4: 参数记录 + params = pd.DataFrame({ + 'parameter': ['L_MAX', 'MU_SUM_MAX', 'CV_MAX', 'Q', 'ALPHA', 'BETA', 'GAMMA'], + 'value': [L_MAX, MU_SUM_MAX, CV_MAX, Q, ALPHA, BETA, GAMMA], + 'description': ['距离阈值(英里)', '需求和上限', 'CV上限', '卡车容量', + '容量利用率权重', '距离惩罚权重', '风险惩罚权重'] + }) + params.to_excel(writer, sheet_name='parameters', index=False) + + # Sheet 5: 汇总统计 + summary = pd.DataFrame({ + 'metric': ['total_sites', 'candidate_pairs', 'selected_pairs', + 'paired_sites', 'unpaired_sites', 'avg_distance', 'avg_mu_sum'], + 'value': [n, len(df_candidates), len(df_selected), + len(used_sites), n - len(used_sites), + df_selected['distance'].mean() if len(df_selected) > 0 else 0, + df_selected['mu_sum'].mean() if len(df_selected) > 0 else 0] + }) + summary.to_excel(writer, sheet_name='summary', index=False) + +print(f"\n结果已保存至: {OUTPUT_FILE}") +print(" - Sheet 'sites': 站点信息(含CV)") +print(" - Sheet 'all_candidates': 所有候选配对") +print(" - Sheet 'selected_pairs': 最终选择的配对") +print(" - Sheet 'parameters': 参数记录") +print(" - Sheet 'summary': 汇总统计") +print("\n" + "=" * 60) diff --git a/task3/02_pairing.xlsx b/task3/02_pairing.xlsx new file mode 100644 index 0000000..e8bd422 Binary files /dev/null and b/task3/02_pairing.xlsx differ diff --git a/task3/03_allocation.py b/task3/03_allocation.py new file mode 100644 index 0000000..112b367 --- /dev/null +++ b/task3/03_allocation.py @@ -0,0 +1,220 @@ +""" +Task 3 - Step 3: 第一站点最优分配计算 +===================================== + +输入: 02_pairing.xlsx (选中的配对列表) +输出: 03_allocation.xlsx (含最优分配量q*的配对列表) + +最优分配公式: +q* = (σ_j * μ_i + σ_i * (Q - μ_j)) / (σ_i + σ_j) + +鲁棒性约束: +q_final = clip(q*, μ_i - σ_i, Q - μ_j + σ_j) + +物理意义: +- 波动大的站点需要更多"缓冲" +- 当σ_j >> σ_i时,q* → μ_i(精确分配给站点i) +- 当σ_i >> σ_j时,q* → Q - μ_j(为站点j预留更多) +""" + +import pandas as pd +import numpy as np +from scipy import stats + +# ============================================ +# 参数设置 +# ============================================ +INPUT_FILE = '02_pairing.xlsx' +OUTPUT_FILE = '03_allocation.xlsx' + +Q = 400 # 卡车容量 +K_ROBUST = 1 # 鲁棒性水平 (84%保护) + +# ============================================ +# 读取数据 +# ============================================ +print("=" * 60) +print("Task 3 - Step 3: 第一站点最优分配计算") +print("=" * 60) + +# 读取选中的配对 +df_pairs = pd.read_excel(INPUT_FILE, sheet_name='selected_pairs') +print(f"\n读取配对数据: {len(df_pairs)} 对") + +# ============================================ +# 计算最优分配 +# ============================================ +print(f"\n" + "-" * 40) +print("计算最优分配 q*") +print("-" * 40) + +def optimal_allocation(mu_i, sigma_i, mu_j, sigma_j, Q=400): + """ + 计算双站点访问时第一站点的最优分配量 + + 公式: q* = (σ_j * μ_i + σ_i * (Q - μ_j)) / (σ_i + σ_j) + """ + if sigma_i + sigma_j == 0: + # 边界情况:两站点都无波动 + return mu_i + + q_star = (sigma_j * mu_i + sigma_i * (Q - mu_j)) / (sigma_i + sigma_j) + return q_star + + +def robust_allocation(q_star, mu_i, sigma_i, mu_j, sigma_j, Q=400, k=1): + """ + 应用鲁棒性约束 + + 约束: μ_i - k*σ_i ≤ q ≤ Q - μ_j + k*σ_j + """ + q_lower = max(0, mu_i - k * sigma_i) + q_upper = min(Q, Q - mu_j + k * sigma_j) + + # 确保上下界合理 + if q_lower > q_upper: + # 如果约束冲突,取中点 + q_final = (q_lower + q_upper) / 2 + else: + q_final = np.clip(q_star, q_lower, q_upper) + + return q_final, q_lower, q_upper + + +def expected_service(q, mu, sigma): + """ + 计算截断期望 E[min(D, q)] + + E[min(D, q)] = μ * Φ(z) - σ * φ(z) + q * (1 - Φ(z)) + where z = (q - μ) / σ + + 当q > μ时,大部分时候D < q,期望接近μ + 当q < μ时,经常D > q,期望接近q但略低 + """ + if sigma == 0: + return min(mu, q) + + z = (q - mu) / sigma + phi_z = stats.norm.pdf(z) + Phi_z = stats.norm.cdf(z) + + return mu * Phi_z - sigma * phi_z + q * (1 - Phi_z) + + +# 计算每个配对的分配 +results = [] + +for idx, row in df_pairs.iterrows(): + mu_i = row['mu_i'] + sigma_i = row['sigma_i'] + mu_j = row['mu_j'] + sigma_j = row['sigma_j'] + + # 计算最优分配 + q_star = optimal_allocation(mu_i, sigma_i, mu_j, sigma_j, Q) + + # 应用鲁棒性约束 + q_final, q_lower, q_upper = robust_allocation( + q_star, mu_i, sigma_i, mu_j, sigma_j, Q, K_ROBUST + ) + + # 计算期望服务量 + E_Si = expected_service(q_final, mu_i, sigma_i) + E_Sj = expected_service(Q - q_final, mu_j, sigma_j) + E_total = E_Si + E_Sj + + # 计算分配比例 + alloc_ratio = q_final / Q if Q > 0 else 0 + + results.append({ + **row.to_dict(), + 'q_star': q_star, + 'q_lower': q_lower, + 'q_upper': q_upper, + 'q_final': q_final, + 'q_ratio': alloc_ratio, + 'E_Si': E_Si, + 'E_Sj': E_Sj, + 'E_total': E_total, + 'efficiency': E_total / (mu_i + mu_j) if (mu_i + mu_j) > 0 else 0 + }) + +df_allocation = pd.DataFrame(results) + +# ============================================ +# 统计信息 +# ============================================ +print(f"\n分配统计:") +print(f" - q* 范围: [{df_allocation['q_star'].min():.1f}, {df_allocation['q_star'].max():.1f}]") +print(f" - q_final 范围: [{df_allocation['q_final'].min():.1f}, {df_allocation['q_final'].max():.1f}]") +print(f" - 分配比例范围: [{df_allocation['q_ratio'].min():.2%}, {df_allocation['q_ratio'].max():.2%}]") +print(f" - 平均分配比例: {df_allocation['q_ratio'].mean():.2%}") + +print(f"\n期望服务量统计:") +print(f" - E[S_i] 范围: [{df_allocation['E_Si'].min():.1f}, {df_allocation['E_Si'].max():.1f}]") +print(f" - E[S_j] 范围: [{df_allocation['E_Sj'].min():.1f}, {df_allocation['E_Sj'].max():.1f}]") +print(f" - E[total] 范围: [{df_allocation['E_total'].min():.1f}, {df_allocation['E_total'].max():.1f}]") +print(f" - 平均效率: {df_allocation['efficiency'].mean():.2%}") + +# 检查是否有被约束裁剪的情况 +clipped_lower = (df_allocation['q_final'] <= df_allocation['q_lower'] + 0.01).sum() +clipped_upper = (df_allocation['q_final'] >= df_allocation['q_upper'] - 0.01).sum() +print(f"\n鲁棒性约束影响:") +print(f" - 触及下界: {clipped_lower} 对") +print(f" - 触及上界: {clipped_upper} 对") +print(f" - 未被裁剪: {len(df_allocation) - clipped_lower - clipped_upper} 对") + +# ============================================ +# 显示关键配对的分配 +# ============================================ +print(f"\n" + "-" * 40) +print("高价值配对的分配方案 (Top 10)") +print("-" * 40) + +display_cols = ['site_i_name', 'site_j_name', 'mu_i', 'mu_j', 'q_final', 'E_Si', 'E_Sj', 'E_total'] +df_top = df_allocation.nlargest(10, 'value')[display_cols].copy() +df_top['site_i_name'] = df_top['site_i_name'].str[:25] +df_top['site_j_name'] = df_top['site_j_name'].str[:25] +print(df_top.to_string(index=False)) + +# ============================================ +# 保存结果 +# ============================================ +with pd.ExcelWriter(OUTPUT_FILE, engine='openpyxl') as writer: + # Sheet 1: 完整分配结果 + df_allocation.to_excel(writer, sheet_name='allocation', index=False) + + # Sheet 2: 简化视图 + simple_cols = ['site_i_id', 'site_j_id', 'site_i_name', 'site_j_name', + 'mu_i', 'mu_j', 'sigma_i', 'sigma_j', 'k_i', 'k_j', + 'q_final', 'E_Si', 'E_Sj', 'E_total', 'value'] + df_allocation[simple_cols].to_excel(writer, sheet_name='allocation_simple', index=False) + + # Sheet 3: 参数记录 + params = pd.DataFrame({ + 'parameter': ['Q', 'K_ROBUST'], + 'value': [Q, K_ROBUST], + 'description': ['卡车容量', '鲁棒性水平(k*σ)'] + }) + params.to_excel(writer, sheet_name='parameters', index=False) + + # Sheet 4: 汇总统计 + summary = pd.DataFrame({ + 'metric': ['total_pairs', 'avg_q_ratio', 'avg_E_total', 'avg_efficiency', + 'min_E_total', 'max_E_total', 'clipped_lower', 'clipped_upper'], + 'value': [len(df_allocation), + df_allocation['q_ratio'].mean(), + df_allocation['E_total'].mean(), + df_allocation['efficiency'].mean(), + df_allocation['E_total'].min(), + df_allocation['E_total'].max(), + clipped_lower, clipped_upper] + }) + summary.to_excel(writer, sheet_name='summary', index=False) + +print(f"\n结果已保存至: {OUTPUT_FILE}") +print(" - Sheet 'allocation': 完整分配结果") +print(" - Sheet 'allocation_simple': 简化视图") +print(" - Sheet 'parameters': 参数记录") +print(" - Sheet 'summary': 汇总统计") +print("\n" + "=" * 60) diff --git a/task3/03_allocation.xlsx b/task3/03_allocation.xlsx new file mode 100644 index 0000000..216b5ea Binary files /dev/null and b/task3/03_allocation.xlsx differ diff --git a/task3/04_reschedule.py b/task3/04_reschedule.py new file mode 100644 index 0000000..1d8e493 --- /dev/null +++ b/task3/04_reschedule.py @@ -0,0 +1,237 @@ +""" +Task 3 - Step 4: 访问次数重分配 +================================ + +输入: + - 03_allocation.xlsx (配对及分配方案) + - ../task1/03_allocate.xlsx (原始频次分配) + +输出: 04_reschedule.xlsx (重分配后的访问次数) + +重分配逻辑: +1. 对于每个配对(i,j),计算双站点访问次数: k_ij = floor(min(k_i, k_j) / 2) +2. 更新单独访问次数: k'_i = k_i - k_ij, k'_j = k_j - k_ij +3. 释放的槽位: ΔN = Σ k_ij +4. 将ΔN按需求比例分配给所有站点 + +约束: +- 双站点访问算1次"访问事件" +- 每天仍然2次访问事件(单站点或双站点) +- 总访问事件数 = 730 +""" + +import pandas as pd +import numpy as np + +# ============================================ +# 参数设置 +# ============================================ +ALLOCATION_FILE = '03_allocation.xlsx' +TASK1_FILE = '../task1/03_allocate.xlsx' +OUTPUT_FILE = '04_reschedule.xlsx' + +MERGE_RATIO = 0.5 # 合并比例: min(k_i, k_j) * ratio +TOTAL_EVENTS = 730 # 年度总访问事件数 + +# ============================================ +# 读取数据 +# ============================================ +print("=" * 60) +print("Task 3 - Step 4: 访问次数重分配") +print("=" * 60) + +# 读取配对数据 +df_pairs = pd.read_excel(ALLOCATION_FILE, sheet_name='allocation') +print(f"\n读取配对数据: {len(df_pairs)} 对") + +# 读取原始分配 +df_original = pd.read_excel(TASK1_FILE) +print(f"读取原始分配: {len(df_original)} 个站点") +print(f"原始总访问次数: {df_original['k'].sum()}") + +# ============================================ +# 计算双站点访问次数 +# ============================================ +print(f"\n" + "-" * 40) +print("计算双站点访问次数") +print("-" * 40) + +# 创建站点访问信息的副本 +sites = df_original[['site_id', 'site_name', 'mu', 'sigma', 'mu_tilde', 'k']].copy() +sites['k_original'] = sites['k'] +sites['k_single'] = sites['k'] # 将被更新 +sites['k_dual'] = 0 # 作为配对中的一员参与双站点访问的次数 +sites['is_paired'] = False +sites['pair_partner'] = None + +# 记录配对信息 +pair_visits = [] +paired_sites = set() + +for idx, row in df_pairs.iterrows(): + site_i = row['site_i_id'] + site_j = row['site_j_id'] + k_i = row['k_i'] + k_j = row['k_j'] + + # 计算双站点访问次数 + k_ij = int(min(k_i, k_j) * MERGE_RATIO) + + # 确保至少保留1次单独访问 + if k_ij >= min(k_i, k_j): + k_ij = min(k_i, k_j) - 1 + + if k_ij < 1: + k_ij = 0 # 如果无法合并,跳过 + + # 更新单独访问次数 + k_i_single = k_i - k_ij + k_j_single = k_j - k_ij + + # 更新sites表 + sites.loc[sites['site_id'] == site_i, 'k_single'] = k_i_single + sites.loc[sites['site_id'] == site_i, 'k_dual'] = k_ij + sites.loc[sites['site_id'] == site_i, 'is_paired'] = True + sites.loc[sites['site_id'] == site_i, 'pair_partner'] = site_j + + sites.loc[sites['site_id'] == site_j, 'k_single'] = k_j_single + sites.loc[sites['site_id'] == site_j, 'k_dual'] = k_ij + sites.loc[sites['site_id'] == site_j, 'is_paired'] = True + sites.loc[sites['site_id'] == site_j, 'pair_partner'] = site_i + + paired_sites.add(site_i) + paired_sites.add(site_j) + + pair_visits.append({ + 'pair_id': idx + 1, + 'site_i_id': site_i, + 'site_j_id': site_j, + 'site_i_name': row['site_i_name'], + 'site_j_name': row['site_j_name'], + 'k_i_original': k_i, + 'k_j_original': k_j, + 'k_dual': k_ij, + 'k_i_single': k_i_single, + 'k_j_single': k_j_single, + 'q_final': row['q_final'], + 'E_total': row['E_total'] + }) + +df_pair_visits = pd.DataFrame(pair_visits) + +# ============================================ +# 计算释放的槽位和重分配 +# ============================================ +print(f"\n" + "-" * 40) +print("计算释放槽位") +print("-" * 40) + +# 当前访问事件统计 +total_single = sites['k_single'].sum() +total_dual = df_pair_visits['k_dual'].sum() +total_events_current = total_single + total_dual + +print(f"单站点访问次数: {total_single}") +print(f"双站点访问次数: {total_dual}") +print(f"当前总访问事件: {total_events_current}") + +# 需要填补的槽位 +delta_N = TOTAL_EVENTS - total_events_current +print(f"需要填补的槽位: {delta_N}") + +# 按需求比例重分配 +if delta_N > 0: + print(f"\n重分配 {delta_N} 次额外访问...") + + # 计算每个站点的需求权重 + total_demand = sites['mu_tilde'].sum() + sites['demand_weight'] = sites['mu_tilde'] / total_demand + + # 按比例分配(使用Hamilton方法) + sites['k_extra_float'] = delta_N * sites['demand_weight'] + sites['k_extra'] = sites['k_extra_float'].apply(np.floor).astype(int) + + # 处理余数 + remainder = delta_N - sites['k_extra'].sum() + if remainder > 0: + # 按小数部分降序分配余数 + fractional = sites['k_extra_float'] - sites['k_extra'] + top_indices = fractional.nlargest(int(remainder)).index + sites.loc[top_indices, 'k_extra'] += 1 + + # 更新最终单站点访问次数 + sites['k_single_final'] = sites['k_single'] + sites['k_extra'] +else: + sites['k_extra'] = 0 + sites['k_single_final'] = sites['k_single'] + +# ============================================ +# 验证和统计 +# ============================================ +print(f"\n" + "-" * 40) +print("验证和统计") +print("-" * 40) + +# 最终统计 +final_single = sites['k_single_final'].sum() +final_dual = df_pair_visits['k_dual'].sum() +final_total = final_single + final_dual + +print(f"最终单站点访问: {final_single}") +print(f"最终双站点访问: {final_dual}") +print(f"最终总访问事件: {final_total}") +print(f"目标访问事件: {TOTAL_EVENTS}") +print(f"差异: {final_total - TOTAL_EVENTS}") + +# 站点级别统计 +print(f"\n站点访问次数变化:") +sites['k_total_final'] = sites['k_single_final'] + sites['k_dual'] +print(f" - 原始k范围: [{sites['k_original'].min()}, {sites['k_original'].max()}]") +print(f" - 最终k范围: [{sites['k_total_final'].min()}, {sites['k_total_final'].max()}]") +print(f" - 额外分配范围: [{sites['k_extra'].min()}, {sites['k_extra'].max()}]") + +# 配对统计 +if len(df_pair_visits) > 0: + print(f"\n配对访问统计:") + print(f" - 配对数: {len(df_pair_visits)}") + print(f" - 双站点访问总次数: {df_pair_visits['k_dual'].sum()}") + print(f" - 每对双站点访问范围: [{df_pair_visits['k_dual'].min()}, {df_pair_visits['k_dual'].max()}]") + +# ============================================ +# 保存结果 +# ============================================ +with pd.ExcelWriter(OUTPUT_FILE, engine='openpyxl') as writer: + # Sheet 1: 站点最终访问次数 + sites_output = sites[['site_id', 'site_name', 'mu', 'sigma', 'mu_tilde', + 'k_original', 'k_single', 'k_extra', 'k_single_final', + 'k_dual', 'k_total_final', 'is_paired', 'pair_partner']] + sites_output.to_excel(writer, sheet_name='sites_schedule', index=False) + + # Sheet 2: 配对访问明细 + df_pair_visits.to_excel(writer, sheet_name='pair_visits', index=False) + + # Sheet 3: 参数记录 + params = pd.DataFrame({ + 'parameter': ['MERGE_RATIO', 'TOTAL_EVENTS', 'delta_N'], + 'value': [MERGE_RATIO, TOTAL_EVENTS, delta_N], + 'description': ['合并比例', '年度总访问事件', '额外分配次数'] + }) + params.to_excel(writer, sheet_name='parameters', index=False) + + # Sheet 4: 汇总统计 + summary = pd.DataFrame({ + 'metric': ['total_sites', 'paired_sites', 'unpaired_sites', + 'total_pairs', 'total_dual_visits', 'total_single_visits', + 'total_events', 'original_total_visits'], + 'value': [len(sites), len(paired_sites), len(sites) - len(paired_sites), + len(df_pair_visits), final_dual, final_single, + final_total, sites['k_original'].sum()] + }) + summary.to_excel(writer, sheet_name='summary', index=False) + +print(f"\n结果已保存至: {OUTPUT_FILE}") +print(" - Sheet 'sites_schedule': 站点最终访问次数") +print(" - Sheet 'pair_visits': 配对访问明细") +print(" - Sheet 'parameters': 参数记录") +print(" - Sheet 'summary': 汇总统计") +print("\n" + "=" * 60) diff --git a/task3/04_reschedule.xlsx b/task3/04_reschedule.xlsx new file mode 100644 index 0000000..4f15d64 Binary files /dev/null and b/task3/04_reschedule.xlsx differ diff --git a/task3/05_calendar.py b/task3/05_calendar.py new file mode 100644 index 0000000..db3168b --- /dev/null +++ b/task3/05_calendar.py @@ -0,0 +1,332 @@ +""" +Task 3 - Step 5: 日历排程生成 +============================= + +输入: + - 04_reschedule.xlsx (站点访问次数 + 配对访问明细) + +输出: 05_calendar.xlsx (365天的完整排程) + +排程逻辑: +1. 生成所有访问事件(单站点 + 双站点) +2. 为每个事件计算理想日期(均匀分布) +3. 贪心分配到365天,每天2个事件槽位 +4. 优先安排双站点访问(时间较长) + +约束: +- 每天恰好2个访问事件 +- 同一站点的访问尽量均匀分布 +""" + +import pandas as pd +import numpy as np +from collections import defaultdict + +# ============================================ +# 参数设置 +# ============================================ +INPUT_FILE = '04_reschedule.xlsx' +OUTPUT_FILE = '05_calendar.xlsx' + +DAYS_PER_YEAR = 365 +EVENTS_PER_DAY = 2 +TOTAL_EVENTS = DAYS_PER_YEAR * EVENTS_PER_DAY # 730 + +# ============================================ +# 读取数据 +# ============================================ +print("=" * 60) +print("Task 3 - Step 5: 日历排程生成") +print("=" * 60) + +# 读取站点访问次数 +df_sites = pd.read_excel(INPUT_FILE, sheet_name='sites_schedule') +print(f"\n读取站点数据: {len(df_sites)} 个站点") + +# 读取配对访问 +df_pairs = pd.read_excel(INPUT_FILE, sheet_name='pair_visits') +print(f"读取配对数据: {len(df_pairs)} 对") + +# ============================================ +# 生成访问事件 +# ============================================ +print(f"\n" + "-" * 40) +print("生成访问事件") +print("-" * 40) + +events = [] +event_id = 0 + +# 1. 生成单站点访问事件 +for _, row in df_sites.iterrows(): + site_id = row['site_id'] + site_name = row['site_name'] + k_single = int(row['k_single_final']) + + for visit_num in range(k_single): + # 计算理想日期(均匀分布) + ideal_day = (visit_num + 0.5) * DAYS_PER_YEAR / k_single + + events.append({ + 'event_id': event_id, + 'event_type': 'single', + 'site_i_id': site_id, + 'site_j_id': None, + 'site_i_name': site_name, + 'site_j_name': None, + 'visit_num': visit_num + 1, + 'total_visits': k_single, + 'ideal_day': ideal_day, + 'priority': 0 # 单站点优先级较低 + }) + event_id += 1 + +# 2. 生成双站点访问事件 +for _, row in df_pairs.iterrows(): + site_i_id = row['site_i_id'] + site_j_id = row['site_j_id'] + site_i_name = row['site_i_name'] + site_j_name = row['site_j_name'] + k_dual = int(row['k_dual']) + + for visit_num in range(k_dual): + ideal_day = (visit_num + 0.5) * DAYS_PER_YEAR / k_dual + + events.append({ + 'event_id': event_id, + 'event_type': 'dual', + 'site_i_id': site_i_id, + 'site_j_id': site_j_id, + 'site_i_name': site_i_name, + 'site_j_name': site_j_name, + 'visit_num': visit_num + 1, + 'total_visits': k_dual, + 'ideal_day': ideal_day, + 'priority': 1 # 双站点优先级较高 + }) + event_id += 1 + +df_events = pd.DataFrame(events) +print(f"总访问事件数: {len(df_events)}") +print(f" - 单站点: {(df_events['event_type'] == 'single').sum()}") +print(f" - 双站点: {(df_events['event_type'] == 'dual').sum()}") + +# ============================================ +# 贪心排程算法 +# ============================================ +print(f"\n" + "-" * 40) +print("执行贪心排程") +print("-" * 40) + +# 按理想日期和优先级排序 +df_events_sorted = df_events.sort_values( + ['ideal_day', 'priority'], + ascending=[True, False] +).reset_index(drop=True) + +# 初始化日历槽位 +calendar = {day: [] for day in range(1, DAYS_PER_YEAR + 1)} + +# 记录每个站点最后访问的日期 +last_visit = defaultdict(lambda: -float('inf')) + +# 贪心分配 +assigned_day = [] + +for idx, event in df_events_sorted.iterrows(): + ideal = event['ideal_day'] + site_i = event['site_i_id'] + site_j = event['site_j_id'] + + # 寻找最佳可用日期 + best_day = None + best_score = float('inf') + + # 搜索范围:理想日期附近 + search_start = max(1, int(ideal) - 30) + search_end = min(DAYS_PER_YEAR, int(ideal) + 30) + + for day in range(search_start, search_end + 1): + # 检查槽位是否可用 + if len(calendar[day]) >= EVENTS_PER_DAY: + continue + + # 检查同一站点是否已在当天访问 + sites_on_day = set() + for e in calendar[day]: + sites_on_day.add(e['site_i_id']) + if e['site_j_id'] is not None: + sites_on_day.add(e['site_j_id']) + + if site_i in sites_on_day: + continue + if site_j is not None and site_j in sites_on_day: + continue + + # 计算得分(理想日期偏差 + 间隔惩罚) + day_diff = abs(day - ideal) + + # 间隔惩罚:鼓励与上次访问保持距离 + min_gap = min( + day - last_visit[site_i], + day - last_visit[site_j] if site_j is not None else float('inf') + ) + gap_penalty = max(0, 7 - min_gap) * 2 # 7天内再次访问有惩罚 + + score = day_diff + gap_penalty + + if score < best_score: + best_score = score + best_day = day + + # 如果附近没找到,扩大搜索范围 + if best_day is None: + for day in range(1, DAYS_PER_YEAR + 1): + if len(calendar[day]) < EVENTS_PER_DAY: + sites_on_day = set() + for e in calendar[day]: + sites_on_day.add(e['site_i_id']) + if e['site_j_id'] is not None: + sites_on_day.add(e['site_j_id']) + + if site_i not in sites_on_day: + if site_j is None or site_j not in sites_on_day: + best_day = day + break + + if best_day is None: + print(f"警告: 无法分配事件 {event['event_id']}") + best_day = 1 # 强制分配 + + # 分配到日历 + calendar[best_day].append(event.to_dict()) + assigned_day.append(best_day) + + # 更新最后访问日期 + last_visit[site_i] = best_day + if site_j is not None: + last_visit[site_j] = best_day + +df_events_sorted['assigned_day'] = assigned_day + +# ============================================ +# 生成日历视图 +# ============================================ +print(f"\n" + "-" * 40) +print("生成日历视图") +print("-" * 40) + +calendar_rows = [] +for day in range(1, DAYS_PER_YEAR + 1): + events_on_day = calendar[day] + + row = {'day': day} + + for slot in range(EVENTS_PER_DAY): + if slot < len(events_on_day): + e = events_on_day[slot] + row[f'slot_{slot+1}_type'] = e['event_type'] + row[f'slot_{slot+1}_site_i'] = e['site_i_id'] + row[f'slot_{slot+1}_site_j'] = e['site_j_id'] + row[f'slot_{slot+1}_name_i'] = e['site_i_name'] + row[f'slot_{slot+1}_name_j'] = e['site_j_name'] + else: + row[f'slot_{slot+1}_type'] = None + row[f'slot_{slot+1}_site_i'] = None + row[f'slot_{slot+1}_site_j'] = None + row[f'slot_{slot+1}_name_i'] = None + row[f'slot_{slot+1}_name_j'] = None + + calendar_rows.append(row) + +df_calendar = pd.DataFrame(calendar_rows) + +# ============================================ +# 统计和验证 +# ============================================ +print(f"\n排程统计:") + +# 每天事件数 +events_per_day = [len(calendar[d]) for d in range(1, DAYS_PER_YEAR + 1)] +print(f" - 每天事件数: min={min(events_per_day)}, max={max(events_per_day)}, avg={np.mean(events_per_day):.2f}") + +# 理想日期偏差 +df_events_sorted['day_diff'] = abs(df_events_sorted['assigned_day'] - df_events_sorted['ideal_day']) +print(f" - 理想日期偏差: avg={df_events_sorted['day_diff'].mean():.2f}, max={df_events_sorted['day_diff'].max():.0f}") + +# 访问间隔分析 +print(f"\n访问间隔分析:") + +site_visits = defaultdict(list) +for _, event in df_events_sorted.iterrows(): + site_visits[event['site_i_id']].append(event['assigned_day']) + # 只有双站点访问才有site_j_id + if event['site_j_id'] is not None and not (isinstance(event['site_j_id'], float) and np.isnan(event['site_j_id'])): + site_visits[event['site_j_id']].append(event['assigned_day']) + +gaps = [] +for site_id, days in site_visits.items(): + days_sorted = sorted(days) + for i in range(1, len(days_sorted)): + gaps.append(days_sorted[i] - days_sorted[i-1]) + +if gaps: + print(f" - 间隔范围: [{min(gaps)}, {max(gaps)}] 天") + print(f" - 平均间隔: {np.mean(gaps):.1f} 天") + print(f" - 中位数间隔: {np.median(gaps):.1f} 天") + +# ============================================ +# 保存结果 +# ============================================ +with pd.ExcelWriter(OUTPUT_FILE, engine='openpyxl') as writer: + # Sheet 1: 日历视图 + df_calendar.to_excel(writer, sheet_name='calendar', index=False) + + # Sheet 2: 事件详情(含分配日期) + df_events_sorted.to_excel(writer, sheet_name='events', index=False) + + # Sheet 3: 站点访问日期汇总 + site_schedule = [] + for site_id, days in site_visits.items(): + # 跳过None/NaN值 + if site_id is None or (isinstance(site_id, float) and np.isnan(site_id)): + continue + # 处理可能的类型不匹配 + site_row = df_sites[df_sites['site_id'] == int(site_id)] + if len(site_row) > 0: + site_name = site_row['site_name'].values[0] + else: + site_name = f"Site_{site_id}" + days_sorted = sorted(days) + site_schedule.append({ + 'site_id': int(site_id), + 'site_name': site_name, + 'total_visits': len(days), + 'visit_days': ','.join(map(str, days_sorted)), + 'first_visit': days_sorted[0], + 'last_visit': days_sorted[-1] + }) + df_site_schedule = pd.DataFrame(site_schedule) + df_site_schedule.to_excel(writer, sheet_name='site_visits', index=False) + + # Sheet 4: 汇总统计 + summary = pd.DataFrame({ + 'metric': ['total_days', 'total_events', 'single_events', 'dual_events', + 'avg_day_diff', 'max_day_diff', 'avg_gap', 'min_gap', 'max_gap'], + 'value': [DAYS_PER_YEAR, len(df_events), + (df_events['event_type'] == 'single').sum(), + (df_events['event_type'] == 'dual').sum(), + df_events_sorted['day_diff'].mean(), + df_events_sorted['day_diff'].max(), + np.mean(gaps) if gaps else 0, + min(gaps) if gaps else 0, + max(gaps) if gaps else 0] + }) + summary.to_excel(writer, sheet_name='summary', index=False) + +print(f"\n结果已保存至: {OUTPUT_FILE}") +print(" - Sheet 'calendar': 365天日历视图") +print(" - Sheet 'events': 事件详情") +print(" - Sheet 'site_visits': 站点访问日期汇总") +print(" - Sheet 'summary': 汇总统计") +print("\n" + "=" * 60) diff --git a/task3/05_calendar.xlsx b/task3/05_calendar.xlsx new file mode 100644 index 0000000..ca57067 Binary files /dev/null and b/task3/05_calendar.xlsx differ diff --git a/task3/06_evaluate.py b/task3/06_evaluate.py new file mode 100644 index 0000000..1caffb3 --- /dev/null +++ b/task3/06_evaluate.py @@ -0,0 +1,348 @@ +""" +Task 3 - Step 6: 效果评估 +========================= + +输入: + - 04_reschedule.xlsx (访问次数) + - 03_allocation.xlsx (配对分配方案) + - ../task1/03_allocate.xlsx (Task 1结果,用于对比) + - ../task1/04_metrics.xlsx (Task 1指标,用于对比) + +输出: 06_evaluate.xlsx (评估指标对比) + +评估指标: +- E1': 期望总服务量 +- E2': 质量加权服务量(总量计算衰减) +- F1': 满足率Gini系数 +- F2': 最低满足率 +- R1: 服务缺口风险 +- RS: 资源节省率 + +有效性衰减公式(总量计算): +q(μ_i + μ_j) = min(1, 250 / (μ_i + μ_j)) +""" + +import pandas as pd +import numpy as np +from scipy import stats + +# ============================================ +# 参数设置 +# ============================================ +RESCHEDULE_FILE = '04_reschedule.xlsx' +ALLOCATION_FILE = '03_allocation.xlsx' +TASK1_ALLOC_FILE = '../task1/03_allocate.xlsx' +TASK1_METRIC_FILE = '../task1/04_metrics.xlsx' +OUTPUT_FILE = '06_evaluate.xlsx' + +Q = 400 # 卡车容量 +QUALITY_THRESHOLD = 250 # 质量衰减阈值 +SHORTFALL_THRESHOLD = 0.8 # 服务缺口阈值 + +# ============================================ +# 辅助函数 +# ============================================ +def quality_factor(mu_total): + """质量折扣因子(总量计算)""" + return min(1.0, QUALITY_THRESHOLD / mu_total) if mu_total > 0 else 1.0 + + +def expected_service(q, mu, sigma): + """E[min(D, q)]""" + if sigma == 0: + return min(mu, q) + z = (q - mu) / sigma + return mu * stats.norm.cdf(z) - sigma * stats.norm.pdf(z) + q * (1 - stats.norm.cdf(z)) + + +def gini_coefficient(values): + """计算Gini系数""" + values = np.array(values) + values = values[~np.isnan(values)] + if len(values) == 0: + return 0 + values = np.sort(values) + n = len(values) + cumsum = np.cumsum(values) + return (2 * np.sum((np.arange(1, n + 1) * values)) - (n + 1) * cumsum[-1]) / (n * cumsum[-1]) if cumsum[-1] > 0 else 0 + + +def shortfall_probability(q, mu, sigma, threshold=0.8): + """P(S/D < threshold),即服务不足的概率""" + # 简化计算:P(min(D,q) / D < 0.8) ≈ P(D > q/0.8) + if sigma == 0: + return 0 if q >= mu * threshold else 1 + critical_demand = q / threshold + return 1 - stats.norm.cdf((critical_demand - mu) / sigma) + + +# ============================================ +# 读取数据 +# ============================================ +print("=" * 60) +print("Task 3 - Step 6: 效果评估") +print("=" * 60) + +# Task 3 数据 +df_sites = pd.read_excel(RESCHEDULE_FILE, sheet_name='sites_schedule') +df_pairs = pd.read_excel(ALLOCATION_FILE, sheet_name='allocation') +df_pair_visits = pd.read_excel(RESCHEDULE_FILE, sheet_name='pair_visits') + +print(f"\n读取Task 3数据:") +print(f" - 站点数: {len(df_sites)}") +print(f" - 配对数: {len(df_pairs)}") + +# Task 1 数据(用于对比) +df_task1 = pd.read_excel(TASK1_ALLOC_FILE) +try: + df_task1_metrics = pd.read_excel(TASK1_METRIC_FILE) + has_task1_metrics = True +except: + has_task1_metrics = False + print(" - 未找到Task 1指标文件,将重新计算") + +# ============================================ +# 计算 Task 3 指标 +# ============================================ +print(f"\n" + "-" * 40) +print("计算 Task 3 指标") +print("-" * 40) + +# 合并配对访问信息 +pair_k = {(row['site_i_id'], row['site_j_id']): row['k_dual'] + for _, row in df_pair_visits.iterrows()} + +# === E1': 期望总服务量 === +E1_prime = 0 + +# 单站点贡献 +for _, row in df_sites.iterrows(): + k_single = row['k_single_final'] + mu = row['mu'] + E1_prime += k_single * mu + +# 双站点贡献 +for _, row in df_pairs.iterrows(): + pair_key = (row['site_i_id'], row['site_j_id']) + k_dual = pair_k.get(pair_key, 0) + E_total = row['E_total'] + E1_prime += k_dual * E_total + +print(f"E1' (期望总服务量): {E1_prime:.0f}") + +# === E2': 质量加权服务量 === +E2_prime = 0 + +# 单站点贡献 +for _, row in df_sites.iterrows(): + k_single = row['k_single_final'] + mu = row['mu'] + q_factor = quality_factor(mu) + E2_prime += k_single * q_factor * mu + +# 双站点贡献(总量计算衰减) +for _, row in df_pairs.iterrows(): + pair_key = (row['site_i_id'], row['site_j_id']) + k_dual = pair_k.get(pair_key, 0) + mu_sum = row['mu_i'] + row['mu_j'] + E_total = row['E_total'] + q_factor = quality_factor(mu_sum) # 总量计算 + E2_prime += k_dual * q_factor * E_total + +print(f"E2' (质量加权服务量): {E2_prime:.0f}") + +# === 满足率计算 === +# Task 1定义: r_i = k_i * μ_i / μ̃_i (等效服务次数) +# Task 3: 需要分别计算单站点和双站点的贡献 + +site_satisfaction = {} + +for _, row in df_sites.iterrows(): + site_id = row['site_id'] + mu = row['mu'] + mu_tilde = row['mu_tilde'] + k_single = row['k_single_final'] + + # 单站点贡献: k_single * μ / μ̃ + r_single = k_single * mu / mu_tilde if mu_tilde > 0 else 0 + site_satisfaction[site_id] = r_single + +# 加上双站点贡献 +for _, row in df_pairs.iterrows(): + pair_key = (row['site_i_id'], row['site_j_id']) + k_dual = pair_k.get(pair_key, 0) + + # 双站点贡献: k_dual * E[S_i] / μ̃_i + mu_tilde_i = df_sites[df_sites['site_id'] == row['site_i_id']]['mu_tilde'].values[0] + mu_tilde_j = df_sites[df_sites['site_id'] == row['site_j_id']]['mu_tilde'].values[0] + + r_dual_i = k_dual * row['E_Si'] / mu_tilde_i if mu_tilde_i > 0 else 0 + r_dual_j = k_dual * row['E_Sj'] / mu_tilde_j if mu_tilde_j > 0 else 0 + + site_satisfaction[row['site_i_id']] += r_dual_i + site_satisfaction[row['site_j_id']] += r_dual_j + +# 计算满足率列表 +satisfaction_rates = list(site_satisfaction.values()) + +# === F1': Gini系数 === +F1_prime = gini_coefficient(satisfaction_rates) +print(f"F1' (满足率Gini): {F1_prime:.4f}") + +# === F2': 最低满足率 === +F2_prime = min(satisfaction_rates) if satisfaction_rates else 0 +print(f"F2' (最低满足率): {F2_prime:.4f}") + +# === R1: 服务缺口风险 === +# 双站点访问时,任一站点服务不足的概率 +shortfall_probs = [] +for _, row in df_pairs.iterrows(): + q_final = row['q_final'] + mu_i, sigma_i = row['mu_i'], row['sigma_i'] + mu_j, sigma_j = row['mu_j'], row['sigma_j'] + + # 站点i的缺口概率 + p_i = shortfall_probability(q_final, mu_i, sigma_i, SHORTFALL_THRESHOLD) + # 站点j的缺口概率 + p_j = shortfall_probability(Q - q_final, mu_j, sigma_j, SHORTFALL_THRESHOLD) + + # 至少一个站点缺口的概率 + p_either = 1 - (1 - p_i) * (1 - p_j) + shortfall_probs.append(p_either) + +R1 = np.mean(shortfall_probs) if shortfall_probs else 0 +print(f"R1 (平均缺口风险): {R1:.4f}") + +# === RS: 资源节省率 === +total_dual = df_pair_visits['k_dual'].sum() +RS = total_dual / 730 * 100 +print(f"RS (资源节省率): {RS:.2f}%") + +# ============================================ +# 计算 Task 1 指标(用于对比) +# ============================================ +print(f"\n" + "-" * 40) +print("计算 Task 1 指标(对比基准)") +print("-" * 40) + +# E1: 总服务量 +E1_task1 = (df_task1['k'] * df_task1['mu']).sum() +print(f"E1 (Task 1 总服务量): {E1_task1:.0f}") + +# E2: 质量加权 +E2_task1 = 0 +for _, row in df_task1.iterrows(): + q_factor = quality_factor(row['mu']) + E2_task1 += row['k'] * q_factor * row['mu'] +print(f"E2 (Task 1 质量加权): {E2_task1:.0f}") + +# F1: Gini系数 - 使用与Task 3相同的r定义 +# r_i = k_i * μ_i / μ̃_i +task1_rates = [] +for _, row in df_task1.iterrows(): + r = row['k'] * row['mu'] / row['mu_tilde'] if row['mu_tilde'] > 0 else 0 + task1_rates.append(r) +F1_task1 = gini_coefficient(task1_rates) +print(f"F1 (Task 1 Gini): {F1_task1:.4f}") + +# F2: 最低满足率 +F2_task1 = min(task1_rates) if task1_rates else 0 +print(f"F2 (Task 1 最低满足率): {F2_task1:.4f}") + +# ============================================ +# 对比分析 +# ============================================ +print(f"\n" + "-" * 40) +print("Task 3 vs Task 1 对比") +print("-" * 40) + +comparison = pd.DataFrame({ + 'Metric': ['E1 (总服务量)', 'E2 (质量加权)', 'F1 (Gini系数)', 'F2 (最低满足率)'], + 'Task 1': [E1_task1, E2_task1, F1_task1, F2_task1], + 'Task 3': [E1_prime, E2_prime, F1_prime, F2_prime], + 'Change': [E1_prime - E1_task1, E2_prime - E2_task1, + F1_prime - F1_task1, F2_prime - F2_task1], + 'Change %': [(E1_prime - E1_task1) / E1_task1 * 100, + (E2_prime - E2_task1) / E2_task1 * 100, + (F1_prime - F1_task1) / F1_task1 * 100 if F1_task1 != 0 else 0, + (F2_prime - F2_task1) / F2_task1 * 100 if F2_task1 != 0 else 0] +}) + +print(comparison.to_string(index=False)) + +# ============================================ +# 保存结果 +# ============================================ +with pd.ExcelWriter(OUTPUT_FILE, engine='openpyxl') as writer: + # Sheet 1: 指标对比 + comparison.to_excel(writer, sheet_name='comparison', index=False) + + # Sheet 2: Task 3 详细指标 + task3_metrics = pd.DataFrame({ + 'metric': ['E1_prime', 'E2_prime', 'F1_prime', 'F2_prime', + 'R1', 'RS', 'total_pairs', 'total_dual_visits'], + 'value': [E1_prime, E2_prime, F1_prime, F2_prime, + R1, RS/100, len(df_pairs), total_dual], + 'description': ['期望总服务量', '质量加权服务量', '满足率Gini系数', '最低满足率', + '平均缺口风险', '资源节省率', '配对数', '双站点访问次数'] + }) + task3_metrics.to_excel(writer, sheet_name='task3_metrics', index=False) + + # Sheet 3: 站点级别满足率 + site_rates = [] + for site_id, r in site_satisfaction.items(): + site_row = df_sites[df_sites['site_id'] == site_id] + if len(site_row) > 0: + site_name = site_row['site_name'].values[0] + mu = site_row['mu'].values[0] + mu_tilde = site_row['mu_tilde'].values[0] + k_single = site_row['k_single_final'].values[0] + k_dual = site_row['k_dual'].values[0] + else: + site_name = f"Site_{site_id}" + mu, mu_tilde, k_single, k_dual = 0, 0, 0, 0 + + site_rates.append({ + 'site_id': site_id, + 'site_name': site_name, + 'mu': mu, + 'mu_tilde': mu_tilde, + 'k_single': k_single, + 'k_dual': k_dual, + 'satisfaction_rate_r': r + }) + df_site_rates = pd.DataFrame(site_rates) + df_site_rates = df_site_rates.sort_values('satisfaction_rate_r') + df_site_rates.to_excel(writer, sheet_name='site_satisfaction', index=False) + + # Sheet 4: 配对风险分析 + pair_risk = [] + for idx, row in df_pairs.iterrows(): + pair_key = (row['site_i_id'], row['site_j_id']) + k_dual = pair_k.get(pair_key, 0) + q_final = row['q_final'] + mu_i, sigma_i = row['mu_i'], row['sigma_i'] + mu_j, sigma_j = row['mu_j'], row['sigma_j'] + + p_i = shortfall_probability(q_final, mu_i, sigma_i, SHORTFALL_THRESHOLD) + p_j = shortfall_probability(Q - q_final, mu_j, sigma_j, SHORTFALL_THRESHOLD) + + pair_risk.append({ + 'site_i_name': row['site_i_name'], + 'site_j_name': row['site_j_name'], + 'k_dual': k_dual, + 'q_final': q_final, + 'shortfall_prob_i': p_i, + 'shortfall_prob_j': p_j, + 'shortfall_prob_either': 1 - (1 - p_i) * (1 - p_j) + }) + df_pair_risk = pd.DataFrame(pair_risk) + df_pair_risk = df_pair_risk.sort_values('shortfall_prob_either', ascending=False) + df_pair_risk.to_excel(writer, sheet_name='pair_risk', index=False) + +print(f"\n结果已保存至: {OUTPUT_FILE}") +print(" - Sheet 'comparison': Task 1 vs Task 3 对比") +print(" - Sheet 'task3_metrics': Task 3 详细指标") +print(" - Sheet 'site_satisfaction': 站点满足率") +print(" - Sheet 'pair_risk': 配对风险分析") +print("\n" + "=" * 60) diff --git a/task3/06_evaluate.xlsx b/task3/06_evaluate.xlsx new file mode 100644 index 0000000..9528082 Binary files /dev/null and b/task3/06_evaluate.xlsx differ diff --git a/task3/README.md b/task3/README.md index 6f63f7b..89a0351 100644 --- a/task3/README.md +++ b/task3/README.md @@ -663,3 +663,80 @@ flowchart TB | 未满足惩罚 | $\lambda$ | 0.5 | 敏感性分析 | | 最大缺口惩罚 | $\eta$ | 0.3 | 敏感性分析 | | 鲁棒性水平 | $k$ | 1 | 84%保护 | +| **合并比例** | $r_{merge}$ | 1/2 | 保留50%独立访问 | + +--- + +## 附录D:实现决策记录 + +### D.1 关键设计决策 + +| 决策项 | 选择 | 理由 | +|--------|------|------| +| 数据来源 | Task 1结果 (`task1/03_allocate.xlsx`) | 复用已验证的需求修正和频次分配 | +| 有效性衰减计算 | **总量计算** $q(\mu_i + \mu_j)$ | 双站点共享同一卡车,总负载决定服务质量 | +| 合并比例 | $k_{max} = \lfloor \min(k_i, k_j) / 2 \rfloor$ | 保留50%独立访问,平衡效率与风险 | +| 配对策略 | 每站点最多配对一次 | 简化实现,足以说明方法论 | +| 双站点访问计数 | 算1次访问事件 | 释放槽位给其他站点 | + +### D.2 有效性衰减公式(总量计算) + +双站点访问时,质量折扣因子按总服务量计算: + +$$q_{ij} = \min\left(1, \frac{250}{\mu_i + \mu_j}\right)$$ + +**E2'的计算**: +$$E_2' = \sum_{\text{单站点}} k_i \cdot q(\mu_i) \cdot \mu_i + \sum_{\text{双站点}} k_{ij} \cdot q(\mu_i + \mu_j) \cdot E[S_i + S_j]$$ + +### D.3 合并比例详述 + +对于配对 $(i, j)$,原频次为 $k_i, k_j$: + +| 项目 | 公式 | +|------|------| +| 双站点访问次数 | $k_{ij} = \lfloor \min(k_i, k_j) / 2 \rfloor$ | +| 站点i剩余单独访问 | $k_i' = k_i - k_{ij}$ | +| 站点j剩余单独访问 | $k_j' = k_j - k_{ij}$ | +| 释放的访问槽位 | $\Delta N = \sum k_{ij}$ | + +--- + +## 附录E:敏感性分析计划 + +### E.1 待分析参数 + +| 参数 | 基准值 | 扫描范围 | 预期影响 | +|------|--------|---------|---------| +| **合并比例** $r_{merge}$ | 1/2 | [1/3, 1/2, 2/3] | 配对数量、资源节省 | +| 距离阈值 $l_{max}$ | 50 mi | [30, 40, 50, 60, 70] | 可行配对数 | +| 容量上限 $\mu_i + \mu_j$ | 450 | [400, 425, 450, 475, 500] | 配对选择范围 | +| CV阈值 | 0.5 | [0.3, 0.4, 0.5, 0.6] | 配对稳定性 | + +### E.2 敏感性分析输出 + +- 各参数对 E1', E2', F1', R1 的影响曲线 +- 参数交互效应热力图 +- 稳健性结论 + +--- + +## 附录F:程序流水线 + +``` +task3/ +├── 01_distance.py # 距离矩阵计算 +│ └── 01_distance.xlsx +├── 02_pairing.py # 配对筛选与选择 +│ └── 02_pairing.xlsx +├── 03_allocation.py # 最优分配计算 +│ └── 03_allocation.xlsx +├── 04_reschedule.py # 访问次数重分配 +│ └── 04_reschedule.xlsx +├── 05_calendar.py # 日历排程生成 +│ └── 05_calendar.xlsx +├── 06_evaluate.py # 效果评估 +│ └── 06_evaluate.xlsx +├── 07_sensitivity.py # 敏感性分析(待实现) +│ └── 07_sensitivity.xlsx +└── figures/ # 可视化输出 +```