""" Task 3 - Step 4: 访问次数重分配 ================================ 输入: - 03_allocation.xlsx (配对及分配方案) - ../task1/03_allocate.xlsx (原始频次分配) 输出: 04_reschedule.xlsx (重分配后的访问次数) 重分配逻辑: 1. 对于每个配对(i,j),计算双站点访问次数: k_ij = floor(min(k_i, k_j) / 2) 2. 更新单独访问次数: k'_i = k_i - k_ij, k'_j = k_j - k_ij 3. 释放的槽位: ΔN = Σ k_ij 4. 将ΔN按需求比例分配给所有站点 约束: - 双站点访问算1次"访问事件" - 每天仍然2次访问事件(单站点或双站点) - 总访问事件数 = 730 """ import pandas as pd import numpy as np # ============================================ # 参数设置 # ============================================ ALLOCATION_FILE = '03_allocation.xlsx' TASK1_FILE = '../task1/03_allocate.xlsx' OUTPUT_FILE = '04_reschedule.xlsx' MERGE_RATIO = 0.5 # 合并比例: min(k_i, k_j) * ratio TOTAL_EVENTS = 730 # 年度总访问事件数 # ============================================ # 读取数据 # ============================================ print("=" * 60) print("Task 3 - Step 4: 访问次数重分配") print("=" * 60) # 读取配对数据 df_pairs = pd.read_excel(ALLOCATION_FILE, sheet_name='allocation') print(f"\n读取配对数据: {len(df_pairs)} 对") # 读取原始分配 df_original = pd.read_excel(TASK1_FILE) print(f"读取原始分配: {len(df_original)} 个站点") print(f"原始总访问次数: {df_original['k'].sum()}") # ============================================ # 计算双站点访问次数 # ============================================ print(f"\n" + "-" * 40) print("计算双站点访问次数") print("-" * 40) # 创建站点访问信息的副本 sites = df_original[['site_id', 'site_name', 'mu', 'sigma', 'mu_tilde', 'k']].copy() sites['k_original'] = sites['k'] sites['k_single'] = sites['k'] # 将被更新 sites['k_dual'] = 0 # 作为配对中的一员参与双站点访问的次数 sites['is_paired'] = False sites['pair_partner'] = None # 记录配对信息 pair_visits = [] paired_sites = set() for idx, row in df_pairs.iterrows(): site_i = row['site_i_id'] site_j = row['site_j_id'] k_i = row['k_i'] k_j = row['k_j'] # 计算双站点访问次数 k_ij = int(min(k_i, k_j) * MERGE_RATIO) # 确保至少保留1次单独访问 if k_ij >= min(k_i, k_j): k_ij = min(k_i, k_j) - 1 if k_ij < 1: k_ij = 0 # 如果无法合并,跳过 # 更新单独访问次数 k_i_single = k_i - k_ij k_j_single = k_j - k_ij # 更新sites表 sites.loc[sites['site_id'] == site_i, 'k_single'] = k_i_single sites.loc[sites['site_id'] == site_i, 'k_dual'] = k_ij sites.loc[sites['site_id'] == site_i, 'is_paired'] = True sites.loc[sites['site_id'] == site_i, 'pair_partner'] = site_j sites.loc[sites['site_id'] == site_j, 'k_single'] = k_j_single sites.loc[sites['site_id'] == site_j, 'k_dual'] = k_ij sites.loc[sites['site_id'] == site_j, 'is_paired'] = True sites.loc[sites['site_id'] == site_j, 'pair_partner'] = site_i paired_sites.add(site_i) paired_sites.add(site_j) pair_visits.append({ 'pair_id': idx + 1, 'site_i_id': site_i, 'site_j_id': site_j, 'site_i_name': row['site_i_name'], 'site_j_name': row['site_j_name'], 'k_i_original': k_i, 'k_j_original': k_j, 'k_dual': k_ij, 'k_i_single': k_i_single, 'k_j_single': k_j_single, 'q_final': row['q_final'], 'E_total': row['E_total'] }) df_pair_visits = pd.DataFrame(pair_visits) # ============================================ # 计算释放的槽位和重分配 # ============================================ print(f"\n" + "-" * 40) print("计算释放槽位") print("-" * 40) # 当前访问事件统计 total_single = sites['k_single'].sum() total_dual = df_pair_visits['k_dual'].sum() total_events_current = total_single + total_dual print(f"单站点访问次数: {total_single}") print(f"双站点访问次数: {total_dual}") print(f"当前总访问事件: {total_events_current}") # 需要填补的槽位 delta_N = TOTAL_EVENTS - total_events_current print(f"需要填补的槽位: {delta_N}") # 按需求比例重分配 if delta_N > 0: print(f"\n重分配 {delta_N} 次额外访问...") # 计算每个站点的需求权重 total_demand = sites['mu_tilde'].sum() sites['demand_weight'] = sites['mu_tilde'] / total_demand # 按比例分配(使用Hamilton方法) sites['k_extra_float'] = delta_N * sites['demand_weight'] sites['k_extra'] = sites['k_extra_float'].apply(np.floor).astype(int) # 处理余数 remainder = delta_N - sites['k_extra'].sum() if remainder > 0: # 按小数部分降序分配余数 fractional = sites['k_extra_float'] - sites['k_extra'] top_indices = fractional.nlargest(int(remainder)).index sites.loc[top_indices, 'k_extra'] += 1 # 更新最终单站点访问次数 sites['k_single_final'] = sites['k_single'] + sites['k_extra'] else: sites['k_extra'] = 0 sites['k_single_final'] = sites['k_single'] # ============================================ # 验证和统计 # ============================================ print(f"\n" + "-" * 40) print("验证和统计") print("-" * 40) # 最终统计 final_single = sites['k_single_final'].sum() final_dual = df_pair_visits['k_dual'].sum() final_total = final_single + final_dual print(f"最终单站点访问: {final_single}") print(f"最终双站点访问: {final_dual}") print(f"最终总访问事件: {final_total}") print(f"目标访问事件: {TOTAL_EVENTS}") print(f"差异: {final_total - TOTAL_EVENTS}") # 站点级别统计 print(f"\n站点访问次数变化:") sites['k_total_final'] = sites['k_single_final'] + sites['k_dual'] print(f" - 原始k范围: [{sites['k_original'].min()}, {sites['k_original'].max()}]") print(f" - 最终k范围: [{sites['k_total_final'].min()}, {sites['k_total_final'].max()}]") print(f" - 额外分配范围: [{sites['k_extra'].min()}, {sites['k_extra'].max()}]") # 配对统计 if len(df_pair_visits) > 0: print(f"\n配对访问统计:") print(f" - 配对数: {len(df_pair_visits)}") print(f" - 双站点访问总次数: {df_pair_visits['k_dual'].sum()}") print(f" - 每对双站点访问范围: [{df_pair_visits['k_dual'].min()}, {df_pair_visits['k_dual'].max()}]") # ============================================ # 保存结果 # ============================================ with pd.ExcelWriter(OUTPUT_FILE, engine='openpyxl') as writer: # Sheet 1: 站点最终访问次数 sites_output = sites[['site_id', 'site_name', 'mu', 'sigma', 'mu_tilde', 'k_original', 'k_single', 'k_extra', 'k_single_final', 'k_dual', 'k_total_final', 'is_paired', 'pair_partner']] sites_output.to_excel(writer, sheet_name='sites_schedule', index=False) # Sheet 2: 配对访问明细 df_pair_visits.to_excel(writer, sheet_name='pair_visits', index=False) # Sheet 3: 参数记录 params = pd.DataFrame({ 'parameter': ['MERGE_RATIO', 'TOTAL_EVENTS', 'delta_N'], 'value': [MERGE_RATIO, TOTAL_EVENTS, delta_N], 'description': ['合并比例', '年度总访问事件', '额外分配次数'] }) params.to_excel(writer, sheet_name='parameters', index=False) # Sheet 4: 汇总统计 summary = pd.DataFrame({ 'metric': ['total_sites', 'paired_sites', 'unpaired_sites', 'total_pairs', 'total_dual_visits', 'total_single_visits', 'total_events', 'original_total_visits'], 'value': [len(sites), len(paired_sites), len(sites) - len(paired_sites), len(df_pair_visits), final_dual, final_single, final_total, sites['k_original'].sum()] }) summary.to_excel(writer, sheet_name='summary', index=False) print(f"\n结果已保存至: {OUTPUT_FILE}") print(" - Sheet 'sites_schedule': 站点最终访问次数") print(" - Sheet 'pair_visits': 配对访问明细") print(" - Sheet 'parameters': 参数记录") print(" - Sheet 'summary': 汇总统计") print("\n" + "=" * 60)