Files
mcm-mfp/task3/04_reschedule.py
2026-01-19 11:57:19 +08:00

238 lines
8.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Task 3 - Step 4: 访问次数重分配
================================
输入:
- 03_allocation.xlsx (配对及分配方案)
- ../task1/03_allocate.xlsx (原始频次分配)
输出: 04_reschedule.xlsx (重分配后的访问次数)
重分配逻辑:
1. 对于每个配对(i,j),计算双站点访问次数: k_ij = floor(min(k_i, k_j) / 2)
2. 更新单独访问次数: k'_i = k_i - k_ij, k'_j = k_j - k_ij
3. 释放的槽位: ΔN = Σ k_ij
4. 将ΔN按需求比例分配给所有站点
约束:
- 双站点访问算1次"访问事件"
- 每天仍然2次访问事件单站点或双站点
- 总访问事件数 = 730
"""
import pandas as pd
import numpy as np
# ============================================
# 参数设置
# ============================================
ALLOCATION_FILE = '03_allocation.xlsx'
TASK1_FILE = '../task1/03_allocate.xlsx'
OUTPUT_FILE = '04_reschedule.xlsx'
MERGE_RATIO = 0.5 # 合并比例: min(k_i, k_j) * ratio
TOTAL_EVENTS = 730 # 年度总访问事件数
# ============================================
# 读取数据
# ============================================
print("=" * 60)
print("Task 3 - Step 4: 访问次数重分配")
print("=" * 60)
# 读取配对数据
df_pairs = pd.read_excel(ALLOCATION_FILE, sheet_name='allocation')
print(f"\n读取配对数据: {len(df_pairs)}")
# 读取原始分配
df_original = pd.read_excel(TASK1_FILE)
print(f"读取原始分配: {len(df_original)} 个站点")
print(f"原始总访问次数: {df_original['k'].sum()}")
# ============================================
# 计算双站点访问次数
# ============================================
print(f"\n" + "-" * 40)
print("计算双站点访问次数")
print("-" * 40)
# 创建站点访问信息的副本
sites = df_original[['site_id', 'site_name', 'mu', 'sigma', 'mu_tilde', 'k']].copy()
sites['k_original'] = sites['k']
sites['k_single'] = sites['k'] # 将被更新
sites['k_dual'] = 0 # 作为配对中的一员参与双站点访问的次数
sites['is_paired'] = False
sites['pair_partner'] = None
# 记录配对信息
pair_visits = []
paired_sites = set()
for idx, row in df_pairs.iterrows():
site_i = row['site_i_id']
site_j = row['site_j_id']
k_i = row['k_i']
k_j = row['k_j']
# 计算双站点访问次数
k_ij = int(min(k_i, k_j) * MERGE_RATIO)
# 确保至少保留1次单独访问
if k_ij >= min(k_i, k_j):
k_ij = min(k_i, k_j) - 1
if k_ij < 1:
k_ij = 0 # 如果无法合并,跳过
# 更新单独访问次数
k_i_single = k_i - k_ij
k_j_single = k_j - k_ij
# 更新sites表
sites.loc[sites['site_id'] == site_i, 'k_single'] = k_i_single
sites.loc[sites['site_id'] == site_i, 'k_dual'] = k_ij
sites.loc[sites['site_id'] == site_i, 'is_paired'] = True
sites.loc[sites['site_id'] == site_i, 'pair_partner'] = site_j
sites.loc[sites['site_id'] == site_j, 'k_single'] = k_j_single
sites.loc[sites['site_id'] == site_j, 'k_dual'] = k_ij
sites.loc[sites['site_id'] == site_j, 'is_paired'] = True
sites.loc[sites['site_id'] == site_j, 'pair_partner'] = site_i
paired_sites.add(site_i)
paired_sites.add(site_j)
pair_visits.append({
'pair_id': idx + 1,
'site_i_id': site_i,
'site_j_id': site_j,
'site_i_name': row['site_i_name'],
'site_j_name': row['site_j_name'],
'k_i_original': k_i,
'k_j_original': k_j,
'k_dual': k_ij,
'k_i_single': k_i_single,
'k_j_single': k_j_single,
'q_final': row['q_final'],
'E_total': row['E_total']
})
df_pair_visits = pd.DataFrame(pair_visits)
# ============================================
# 计算释放的槽位和重分配
# ============================================
print(f"\n" + "-" * 40)
print("计算释放槽位")
print("-" * 40)
# 当前访问事件统计
total_single = sites['k_single'].sum()
total_dual = df_pair_visits['k_dual'].sum()
total_events_current = total_single + total_dual
print(f"单站点访问次数: {total_single}")
print(f"双站点访问次数: {total_dual}")
print(f"当前总访问事件: {total_events_current}")
# 需要填补的槽位
delta_N = TOTAL_EVENTS - total_events_current
print(f"需要填补的槽位: {delta_N}")
# 按需求比例重分配
if delta_N > 0:
print(f"\n重分配 {delta_N} 次额外访问...")
# 计算每个站点的需求权重
total_demand = sites['mu_tilde'].sum()
sites['demand_weight'] = sites['mu_tilde'] / total_demand
# 按比例分配使用Hamilton方法
sites['k_extra_float'] = delta_N * sites['demand_weight']
sites['k_extra'] = sites['k_extra_float'].apply(np.floor).astype(int)
# 处理余数
remainder = delta_N - sites['k_extra'].sum()
if remainder > 0:
# 按小数部分降序分配余数
fractional = sites['k_extra_float'] - sites['k_extra']
top_indices = fractional.nlargest(int(remainder)).index
sites.loc[top_indices, 'k_extra'] += 1
# 更新最终单站点访问次数
sites['k_single_final'] = sites['k_single'] + sites['k_extra']
else:
sites['k_extra'] = 0
sites['k_single_final'] = sites['k_single']
# ============================================
# 验证和统计
# ============================================
print(f"\n" + "-" * 40)
print("验证和统计")
print("-" * 40)
# 最终统计
final_single = sites['k_single_final'].sum()
final_dual = df_pair_visits['k_dual'].sum()
final_total = final_single + final_dual
print(f"最终单站点访问: {final_single}")
print(f"最终双站点访问: {final_dual}")
print(f"最终总访问事件: {final_total}")
print(f"目标访问事件: {TOTAL_EVENTS}")
print(f"差异: {final_total - TOTAL_EVENTS}")
# 站点级别统计
print(f"\n站点访问次数变化:")
sites['k_total_final'] = sites['k_single_final'] + sites['k_dual']
print(f" - 原始k范围: [{sites['k_original'].min()}, {sites['k_original'].max()}]")
print(f" - 最终k范围: [{sites['k_total_final'].min()}, {sites['k_total_final'].max()}]")
print(f" - 额外分配范围: [{sites['k_extra'].min()}, {sites['k_extra'].max()}]")
# 配对统计
if len(df_pair_visits) > 0:
print(f"\n配对访问统计:")
print(f" - 配对数: {len(df_pair_visits)}")
print(f" - 双站点访问总次数: {df_pair_visits['k_dual'].sum()}")
print(f" - 每对双站点访问范围: [{df_pair_visits['k_dual'].min()}, {df_pair_visits['k_dual'].max()}]")
# ============================================
# 保存结果
# ============================================
with pd.ExcelWriter(OUTPUT_FILE, engine='openpyxl') as writer:
# Sheet 1: 站点最终访问次数
sites_output = sites[['site_id', 'site_name', 'mu', 'sigma', 'mu_tilde',
'k_original', 'k_single', 'k_extra', 'k_single_final',
'k_dual', 'k_total_final', 'is_paired', 'pair_partner']]
sites_output.to_excel(writer, sheet_name='sites_schedule', index=False)
# Sheet 2: 配对访问明细
df_pair_visits.to_excel(writer, sheet_name='pair_visits', index=False)
# Sheet 3: 参数记录
params = pd.DataFrame({
'parameter': ['MERGE_RATIO', 'TOTAL_EVENTS', 'delta_N'],
'value': [MERGE_RATIO, TOTAL_EVENTS, delta_N],
'description': ['合并比例', '年度总访问事件', '额外分配次数']
})
params.to_excel(writer, sheet_name='parameters', index=False)
# Sheet 4: 汇总统计
summary = pd.DataFrame({
'metric': ['total_sites', 'paired_sites', 'unpaired_sites',
'total_pairs', 'total_dual_visits', 'total_single_visits',
'total_events', 'original_total_visits'],
'value': [len(sites), len(paired_sites), len(sites) - len(paired_sites),
len(df_pair_visits), final_dual, final_single,
final_total, sites['k_original'].sum()]
})
summary.to_excel(writer, sheet_name='summary', index=False)
print(f"\n结果已保存至: {OUTPUT_FILE}")
print(" - Sheet 'sites_schedule': 站点最终访问次数")
print(" - Sheet 'pair_visits': 配对访问明细")
print(" - Sheet 'parameters': 参数记录")
print(" - Sheet 'summary': 汇总统计")
print("\n" + "=" * 60)