Files
mcm-mfp/task1/05_schedule.py

304 lines
9.8 KiB
Python
Raw Normal View History

2026-01-19 10:14:46 +08:00
"""
Step 05: 日历排程 - 贪心装箱算法
输入: 03_allocate.xlsx
输出: 05_schedule.xlsx
功能:
1. 将年度频次 k_i 转化为具体日期
2. 保证每天恰好2个站点
3. 优化访问间隔的均匀性
4. 输出完整的365天日历
约束:
- 每天恰好2个站点
- 每站点出现次数 = k_i
- 同一站点相邻访问间隔尽量均匀
"""
import pandas as pd
import numpy as np
from pathlib import Path
from collections import defaultdict
import random
# 路径配置
INPUT_PATH = Path(__file__).parent / "03_allocate.xlsx"
OUTPUT_PATH = Path(__file__).parent / "05_schedule.xlsx"
# 排程参数
T = 365 # 全年天数
CAPACITY = 2 # 每天站点数
RANDOM_SEED = 42 # 随机种子 (用于局部优化)
def generate_ideal_dates(k: int, T: int = 365) -> list:
"""
生成站点的理想访问日期
k 次访问均匀分布在 [1, T]
t_j = round((j + 0.5) * T / k), j = 0, 1, ..., k-1
"""
dates = []
for j in range(k):
ideal_day = round((j + 0.5) * T / k)
ideal_day = max(1, min(T, ideal_day))
dates.append(ideal_day)
return dates
def greedy_schedule(site_visits: dict, T: int = 365, capacity: int = 2) -> dict:
"""
贪心装箱算法
Args:
site_visits: {site_id: k} 各站点的年度访问次数
T: 全年天数
capacity: 每天站点容量
Returns:
calendar: {day: [site_id, ...]} 日历排程
"""
# 生成所有访问事件: (理想日期, 站点ID)
events = []
for site_id, k in site_visits.items():
ideal_dates = generate_ideal_dates(k, T)
for ideal_day in ideal_dates:
events.append((ideal_day, site_id))
# 按理想日期排序
events.sort(key=lambda x: (x[0], x[1]))
# 初始化日历
calendar = {day: [] for day in range(1, T + 1)}
# 贪心分配
for ideal_day, site_id in events:
assigned = False
# 从理想日期向两侧搜索可用槽位
for offset in range(T):
for day in [ideal_day + offset, ideal_day - offset]:
if 1 <= day <= T:
# 检查容量和重复
if len(calendar[day]) < capacity and site_id not in calendar[day]:
calendar[day].append(site_id)
assigned = True
break
if assigned:
break
if not assigned:
print(f"警告: 无法分配站点 {site_id} (理想日期 {ideal_day})")
return calendar
def compute_gap_stats(calendar: dict, site_id: int) -> dict:
"""计算单个站点的访问间隔统计"""
days = sorted([day for day, sites in calendar.items() if site_id in sites])
if len(days) < 2:
return {
'n_visits': len(days),
'gaps': [],
'gap_mean': None,
'gap_std': None,
'gap_min': None,
'gap_max': None,
'gap_cv': None
}
gaps = [days[i + 1] - days[i] for i in range(len(days) - 1)]
return {
'n_visits': len(days),
'gaps': gaps,
'gap_mean': np.mean(gaps),
'gap_std': np.std(gaps),
'gap_min': min(gaps),
'gap_max': max(gaps),
'gap_cv': np.std(gaps) / np.mean(gaps) if np.mean(gaps) > 0 else 0
}
def local_optimization(calendar: dict, site_ids: list, max_iter: int = 5000, seed: int = 42) -> dict:
"""
局部搜索优化间隔均匀性
通过随机交换两天的站点若改善总间隔方差则接受
"""
random.seed(seed)
calendar = {day: list(sites) for day, sites in calendar.items()} # 深拷贝
def total_gap_variance():
"""计算所有站点间隔方差之和"""
total_var = 0
for site_id in site_ids:
stats = compute_gap_stats(calendar, site_id)
if stats['gap_std'] is not None:
total_var += stats['gap_std'] ** 2
return total_var
current_var = total_gap_variance()
improved = 0
for iteration in range(max_iter):
# 随机选两天
t1, t2 = random.sample(range(1, 366), 2)
if len(calendar[t1]) == 2 and len(calendar[t2]) == 2:
# 随机选择交换位置
pos1, pos2 = random.randint(0, 1), random.randint(0, 1)
s1, s2 = calendar[t1][pos1], calendar[t2][pos2]
# 检查交换可行性 (不能产生重复)
if s1 != s2:
other1 = calendar[t1][1 - pos1]
other2 = calendar[t2][1 - pos2]
if s2 != other1 and s1 != other2:
# 尝试交换
calendar[t1][pos1], calendar[t2][pos2] = s2, s1
new_var = total_gap_variance()
if new_var < current_var:
current_var = new_var
improved += 1
else:
# 撤销
calendar[t1][pos1], calendar[t2][pos2] = s1, s2
return calendar, improved
def main():
print("=" * 60)
print("Step 05: 日历排程 - 贪心装箱算法")
print("=" * 60)
# 1. 读取分配结果
print(f"\n[1] 读取输入: {INPUT_PATH}")
df = pd.read_excel(INPUT_PATH)
print(f" 读取 {len(df)} 条记录")
# 构建 site_visits 字典
site_visits = dict(zip(df['site_id'], df['k']))
total_visits = sum(site_visits.values())
print(f" 总访问次数: {total_visits}")
print(f" 期望日历天数: {total_visits // CAPACITY}")
# 2. 执行贪心排程
print(f"\n[2] 执行贪心装箱排程...")
calendar = greedy_schedule(site_visits, T, CAPACITY)
# 验证
total_assigned = sum(len(sites) for sites in calendar.values())
print(f" 已分配访问事件: {total_assigned} / {total_visits}")
empty_days = sum(1 for sites in calendar.values() if len(sites) == 0)
partial_days = sum(1 for sites in calendar.values() if len(sites) == 1)
full_days = sum(1 for sites in calendar.values() if len(sites) == 2)
print(f" 日历统计: {full_days} 满载 + {partial_days} 部分 + {empty_days} 空闲")
# 3. 局部优化
print(f"\n[3] 局部优化 (改善间隔均匀性)...")
site_ids = list(site_visits.keys())
calendar_opt, n_improved = local_optimization(calendar, site_ids, max_iter=5000, seed=RANDOM_SEED)
print(f" 优化迭代: 5000 次")
print(f" 接受的改进: {n_improved}")
# 4. 间隔统计
print(f"\n[4] 访问间隔统计")
gap_stats_list = []
for site_id in site_ids:
stats = compute_gap_stats(calendar_opt, site_id)
stats['site_id'] = site_id
gap_stats_list.append(stats)
df_gaps = pd.DataFrame(gap_stats_list)
df_gaps = df_gaps.merge(df[['site_id', 'site_name', 'k']], on='site_id')
# 全局统计
valid_gaps = df_gaps[df_gaps['gap_mean'].notna()]
print(f" 平均间隔均值: {valid_gaps['gap_mean'].mean():.2f}")
print(f" 平均间隔标准差: {valid_gaps['gap_std'].mean():.2f}")
print(f" 最大单次间隔: {valid_gaps['gap_max'].max():.0f}")
print(f" 平均间隔CV: {valid_gaps['gap_cv'].mean():.4f}")
# 5. 生成日历输出
print(f"\n[5] 生成日历输出...")
# 日历表: date, site_1, site_2
calendar_rows = []
for day in range(1, T + 1):
sites = calendar_opt.get(day, [])
site_1 = sites[0] if len(sites) > 0 else None
site_2 = sites[1] if len(sites) > 1 else None
calendar_rows.append({
'day': day,
'site_1_id': site_1,
'site_2_id': site_2
})
df_calendar = pd.DataFrame(calendar_rows)
# 添加站点名称
site_name_map = dict(zip(df['site_id'], df['site_name']))
df_calendar['site_1_name'] = df_calendar['site_1_id'].map(site_name_map)
df_calendar['site_2_name'] = df_calendar['site_2_id'].map(site_name_map)
# 6. 站点日期列表
site_dates = []
for site_id in site_ids:
days = sorted([day for day, sites in calendar_opt.items() if site_id in sites])
site_dates.append({
'site_id': site_id,
'site_name': site_name_map[site_id],
'k': len(days),
'dates': ','.join(map(str, days))
})
df_site_dates = pd.DataFrame(site_dates)
# 7. 保存输出
print(f"\n[6] 保存输出: {OUTPUT_PATH}")
with pd.ExcelWriter(OUTPUT_PATH, engine='openpyxl') as writer:
# Sheet 1: 日历 (365天)
df_calendar.to_excel(writer, sheet_name='calendar', index=False)
# Sheet 2: 站点日期列表
df_site_dates.to_excel(writer, sheet_name='site_dates', index=False)
# Sheet 3: 间隔统计
df_gaps_out = df_gaps[['site_id', 'site_name', 'k', 'n_visits', 'gap_mean', 'gap_std', 'gap_min', 'gap_max', 'gap_cv']]
df_gaps_out.to_excel(writer, sheet_name='gap_statistics', index=False)
# Sheet 4: 排程参数
params = pd.DataFrame([
{'parameter': 'T (days)', 'value': T},
{'parameter': 'CAPACITY (sites/day)', 'value': CAPACITY},
{'parameter': 'total_visits', 'value': total_visits},
{'parameter': 'optimization_iterations', 'value': 5000},
{'parameter': 'improvements_accepted', 'value': n_improved},
])
params.to_excel(writer, sheet_name='parameters', index=False)
print(f" 已保存4个工作表: calendar, site_dates, gap_statistics, parameters")
# 8. 输出预览
print(f"\n[7] 日历预览 (前10天):")
print(df_calendar.head(10).to_string(index=False))
print(f"\n 间隔最大的5个站点:")
top5_gap = df_gaps.nlargest(5, 'gap_max')[['site_id', 'site_name', 'k', 'gap_mean', 'gap_max', 'gap_cv']]
print(top5_gap.to_string(index=False))
print("\n" + "=" * 60)
print("Step 05 完成")
print("=" * 60)
return df_calendar, df_gaps
if __name__ == "__main__":
main()