This commit is contained in:
2026-01-19 11:57:19 +08:00
parent c6e22729c7
commit ffc51b2927
13 changed files with 1542 additions and 0 deletions

237
task3/04_reschedule.py Normal file
View File

@@ -0,0 +1,237 @@
"""
Task 3 - Step 4: 访问次数重分配
================================
输入:
- 03_allocation.xlsx (配对及分配方案)
- ../task1/03_allocate.xlsx (原始频次分配)
输出: 04_reschedule.xlsx (重分配后的访问次数)
重分配逻辑:
1. 对于每个配对(i,j),计算双站点访问次数: k_ij = floor(min(k_i, k_j) / 2)
2. 更新单独访问次数: k'_i = k_i - k_ij, k'_j = k_j - k_ij
3. 释放的槽位: ΔN = Σ k_ij
4. 将ΔN按需求比例分配给所有站点
约束:
- 双站点访问算1次"访问事件"
- 每天仍然2次访问事件单站点或双站点
- 总访问事件数 = 730
"""
import pandas as pd
import numpy as np
# ============================================
# 参数设置
# ============================================
ALLOCATION_FILE = '03_allocation.xlsx'
TASK1_FILE = '../task1/03_allocate.xlsx'
OUTPUT_FILE = '04_reschedule.xlsx'
MERGE_RATIO = 0.5 # 合并比例: min(k_i, k_j) * ratio
TOTAL_EVENTS = 730 # 年度总访问事件数
# ============================================
# 读取数据
# ============================================
print("=" * 60)
print("Task 3 - Step 4: 访问次数重分配")
print("=" * 60)
# 读取配对数据
df_pairs = pd.read_excel(ALLOCATION_FILE, sheet_name='allocation')
print(f"\n读取配对数据: {len(df_pairs)}")
# 读取原始分配
df_original = pd.read_excel(TASK1_FILE)
print(f"读取原始分配: {len(df_original)} 个站点")
print(f"原始总访问次数: {df_original['k'].sum()}")
# ============================================
# 计算双站点访问次数
# ============================================
print(f"\n" + "-" * 40)
print("计算双站点访问次数")
print("-" * 40)
# 创建站点访问信息的副本
sites = df_original[['site_id', 'site_name', 'mu', 'sigma', 'mu_tilde', 'k']].copy()
sites['k_original'] = sites['k']
sites['k_single'] = sites['k'] # 将被更新
sites['k_dual'] = 0 # 作为配对中的一员参与双站点访问的次数
sites['is_paired'] = False
sites['pair_partner'] = None
# 记录配对信息
pair_visits = []
paired_sites = set()
for idx, row in df_pairs.iterrows():
site_i = row['site_i_id']
site_j = row['site_j_id']
k_i = row['k_i']
k_j = row['k_j']
# 计算双站点访问次数
k_ij = int(min(k_i, k_j) * MERGE_RATIO)
# 确保至少保留1次单独访问
if k_ij >= min(k_i, k_j):
k_ij = min(k_i, k_j) - 1
if k_ij < 1:
k_ij = 0 # 如果无法合并,跳过
# 更新单独访问次数
k_i_single = k_i - k_ij
k_j_single = k_j - k_ij
# 更新sites表
sites.loc[sites['site_id'] == site_i, 'k_single'] = k_i_single
sites.loc[sites['site_id'] == site_i, 'k_dual'] = k_ij
sites.loc[sites['site_id'] == site_i, 'is_paired'] = True
sites.loc[sites['site_id'] == site_i, 'pair_partner'] = site_j
sites.loc[sites['site_id'] == site_j, 'k_single'] = k_j_single
sites.loc[sites['site_id'] == site_j, 'k_dual'] = k_ij
sites.loc[sites['site_id'] == site_j, 'is_paired'] = True
sites.loc[sites['site_id'] == site_j, 'pair_partner'] = site_i
paired_sites.add(site_i)
paired_sites.add(site_j)
pair_visits.append({
'pair_id': idx + 1,
'site_i_id': site_i,
'site_j_id': site_j,
'site_i_name': row['site_i_name'],
'site_j_name': row['site_j_name'],
'k_i_original': k_i,
'k_j_original': k_j,
'k_dual': k_ij,
'k_i_single': k_i_single,
'k_j_single': k_j_single,
'q_final': row['q_final'],
'E_total': row['E_total']
})
df_pair_visits = pd.DataFrame(pair_visits)
# ============================================
# 计算释放的槽位和重分配
# ============================================
print(f"\n" + "-" * 40)
print("计算释放槽位")
print("-" * 40)
# 当前访问事件统计
total_single = sites['k_single'].sum()
total_dual = df_pair_visits['k_dual'].sum()
total_events_current = total_single + total_dual
print(f"单站点访问次数: {total_single}")
print(f"双站点访问次数: {total_dual}")
print(f"当前总访问事件: {total_events_current}")
# 需要填补的槽位
delta_N = TOTAL_EVENTS - total_events_current
print(f"需要填补的槽位: {delta_N}")
# 按需求比例重分配
if delta_N > 0:
print(f"\n重分配 {delta_N} 次额外访问...")
# 计算每个站点的需求权重
total_demand = sites['mu_tilde'].sum()
sites['demand_weight'] = sites['mu_tilde'] / total_demand
# 按比例分配使用Hamilton方法
sites['k_extra_float'] = delta_N * sites['demand_weight']
sites['k_extra'] = sites['k_extra_float'].apply(np.floor).astype(int)
# 处理余数
remainder = delta_N - sites['k_extra'].sum()
if remainder > 0:
# 按小数部分降序分配余数
fractional = sites['k_extra_float'] - sites['k_extra']
top_indices = fractional.nlargest(int(remainder)).index
sites.loc[top_indices, 'k_extra'] += 1
# 更新最终单站点访问次数
sites['k_single_final'] = sites['k_single'] + sites['k_extra']
else:
sites['k_extra'] = 0
sites['k_single_final'] = sites['k_single']
# ============================================
# 验证和统计
# ============================================
print(f"\n" + "-" * 40)
print("验证和统计")
print("-" * 40)
# 最终统计
final_single = sites['k_single_final'].sum()
final_dual = df_pair_visits['k_dual'].sum()
final_total = final_single + final_dual
print(f"最终单站点访问: {final_single}")
print(f"最终双站点访问: {final_dual}")
print(f"最终总访问事件: {final_total}")
print(f"目标访问事件: {TOTAL_EVENTS}")
print(f"差异: {final_total - TOTAL_EVENTS}")
# 站点级别统计
print(f"\n站点访问次数变化:")
sites['k_total_final'] = sites['k_single_final'] + sites['k_dual']
print(f" - 原始k范围: [{sites['k_original'].min()}, {sites['k_original'].max()}]")
print(f" - 最终k范围: [{sites['k_total_final'].min()}, {sites['k_total_final'].max()}]")
print(f" - 额外分配范围: [{sites['k_extra'].min()}, {sites['k_extra'].max()}]")
# 配对统计
if len(df_pair_visits) > 0:
print(f"\n配对访问统计:")
print(f" - 配对数: {len(df_pair_visits)}")
print(f" - 双站点访问总次数: {df_pair_visits['k_dual'].sum()}")
print(f" - 每对双站点访问范围: [{df_pair_visits['k_dual'].min()}, {df_pair_visits['k_dual'].max()}]")
# ============================================
# 保存结果
# ============================================
with pd.ExcelWriter(OUTPUT_FILE, engine='openpyxl') as writer:
# Sheet 1: 站点最终访问次数
sites_output = sites[['site_id', 'site_name', 'mu', 'sigma', 'mu_tilde',
'k_original', 'k_single', 'k_extra', 'k_single_final',
'k_dual', 'k_total_final', 'is_paired', 'pair_partner']]
sites_output.to_excel(writer, sheet_name='sites_schedule', index=False)
# Sheet 2: 配对访问明细
df_pair_visits.to_excel(writer, sheet_name='pair_visits', index=False)
# Sheet 3: 参数记录
params = pd.DataFrame({
'parameter': ['MERGE_RATIO', 'TOTAL_EVENTS', 'delta_N'],
'value': [MERGE_RATIO, TOTAL_EVENTS, delta_N],
'description': ['合并比例', '年度总访问事件', '额外分配次数']
})
params.to_excel(writer, sheet_name='parameters', index=False)
# Sheet 4: 汇总统计
summary = pd.DataFrame({
'metric': ['total_sites', 'paired_sites', 'unpaired_sites',
'total_pairs', 'total_dual_visits', 'total_single_visits',
'total_events', 'original_total_visits'],
'value': [len(sites), len(paired_sites), len(sites) - len(paired_sites),
len(df_pair_visits), final_dual, final_single,
final_total, sites['k_original'].sum()]
})
summary.to_excel(writer, sheet_name='summary', index=False)
print(f"\n结果已保存至: {OUTPUT_FILE}")
print(" - Sheet 'sites_schedule': 站点最终访问次数")
print(" - Sheet 'pair_visits': 配对访问明细")
print(" - Sheet 'parameters': 参数记录")
print(" - Sheet 'summary': 汇总统计")
print("\n" + "=" * 60)