P3: main
This commit is contained in:
332
task3/05_calendar.py
Normal file
332
task3/05_calendar.py
Normal file
@@ -0,0 +1,332 @@
|
||||
"""
|
||||
Task 3 - Step 5: 日历排程生成
|
||||
=============================
|
||||
|
||||
输入:
|
||||
- 04_reschedule.xlsx (站点访问次数 + 配对访问明细)
|
||||
|
||||
输出: 05_calendar.xlsx (365天的完整排程)
|
||||
|
||||
排程逻辑:
|
||||
1. 生成所有访问事件(单站点 + 双站点)
|
||||
2. 为每个事件计算理想日期(均匀分布)
|
||||
3. 贪心分配到365天,每天2个事件槽位
|
||||
4. 优先安排双站点访问(时间较长)
|
||||
|
||||
约束:
|
||||
- 每天恰好2个访问事件
|
||||
- 同一站点的访问尽量均匀分布
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from collections import defaultdict
|
||||
|
||||
# ============================================
|
||||
# 参数设置
|
||||
# ============================================
|
||||
INPUT_FILE = '04_reschedule.xlsx'
|
||||
OUTPUT_FILE = '05_calendar.xlsx'
|
||||
|
||||
DAYS_PER_YEAR = 365
|
||||
EVENTS_PER_DAY = 2
|
||||
TOTAL_EVENTS = DAYS_PER_YEAR * EVENTS_PER_DAY # 730
|
||||
|
||||
# ============================================
|
||||
# 读取数据
|
||||
# ============================================
|
||||
print("=" * 60)
|
||||
print("Task 3 - Step 5: 日历排程生成")
|
||||
print("=" * 60)
|
||||
|
||||
# 读取站点访问次数
|
||||
df_sites = pd.read_excel(INPUT_FILE, sheet_name='sites_schedule')
|
||||
print(f"\n读取站点数据: {len(df_sites)} 个站点")
|
||||
|
||||
# 读取配对访问
|
||||
df_pairs = pd.read_excel(INPUT_FILE, sheet_name='pair_visits')
|
||||
print(f"读取配对数据: {len(df_pairs)} 对")
|
||||
|
||||
# ============================================
|
||||
# 生成访问事件
|
||||
# ============================================
|
||||
print(f"\n" + "-" * 40)
|
||||
print("生成访问事件")
|
||||
print("-" * 40)
|
||||
|
||||
events = []
|
||||
event_id = 0
|
||||
|
||||
# 1. 生成单站点访问事件
|
||||
for _, row in df_sites.iterrows():
|
||||
site_id = row['site_id']
|
||||
site_name = row['site_name']
|
||||
k_single = int(row['k_single_final'])
|
||||
|
||||
for visit_num in range(k_single):
|
||||
# 计算理想日期(均匀分布)
|
||||
ideal_day = (visit_num + 0.5) * DAYS_PER_YEAR / k_single
|
||||
|
||||
events.append({
|
||||
'event_id': event_id,
|
||||
'event_type': 'single',
|
||||
'site_i_id': site_id,
|
||||
'site_j_id': None,
|
||||
'site_i_name': site_name,
|
||||
'site_j_name': None,
|
||||
'visit_num': visit_num + 1,
|
||||
'total_visits': k_single,
|
||||
'ideal_day': ideal_day,
|
||||
'priority': 0 # 单站点优先级较低
|
||||
})
|
||||
event_id += 1
|
||||
|
||||
# 2. 生成双站点访问事件
|
||||
for _, row in df_pairs.iterrows():
|
||||
site_i_id = row['site_i_id']
|
||||
site_j_id = row['site_j_id']
|
||||
site_i_name = row['site_i_name']
|
||||
site_j_name = row['site_j_name']
|
||||
k_dual = int(row['k_dual'])
|
||||
|
||||
for visit_num in range(k_dual):
|
||||
ideal_day = (visit_num + 0.5) * DAYS_PER_YEAR / k_dual
|
||||
|
||||
events.append({
|
||||
'event_id': event_id,
|
||||
'event_type': 'dual',
|
||||
'site_i_id': site_i_id,
|
||||
'site_j_id': site_j_id,
|
||||
'site_i_name': site_i_name,
|
||||
'site_j_name': site_j_name,
|
||||
'visit_num': visit_num + 1,
|
||||
'total_visits': k_dual,
|
||||
'ideal_day': ideal_day,
|
||||
'priority': 1 # 双站点优先级较高
|
||||
})
|
||||
event_id += 1
|
||||
|
||||
df_events = pd.DataFrame(events)
|
||||
print(f"总访问事件数: {len(df_events)}")
|
||||
print(f" - 单站点: {(df_events['event_type'] == 'single').sum()}")
|
||||
print(f" - 双站点: {(df_events['event_type'] == 'dual').sum()}")
|
||||
|
||||
# ============================================
|
||||
# 贪心排程算法
|
||||
# ============================================
|
||||
print(f"\n" + "-" * 40)
|
||||
print("执行贪心排程")
|
||||
print("-" * 40)
|
||||
|
||||
# 按理想日期和优先级排序
|
||||
df_events_sorted = df_events.sort_values(
|
||||
['ideal_day', 'priority'],
|
||||
ascending=[True, False]
|
||||
).reset_index(drop=True)
|
||||
|
||||
# 初始化日历槽位
|
||||
calendar = {day: [] for day in range(1, DAYS_PER_YEAR + 1)}
|
||||
|
||||
# 记录每个站点最后访问的日期
|
||||
last_visit = defaultdict(lambda: -float('inf'))
|
||||
|
||||
# 贪心分配
|
||||
assigned_day = []
|
||||
|
||||
for idx, event in df_events_sorted.iterrows():
|
||||
ideal = event['ideal_day']
|
||||
site_i = event['site_i_id']
|
||||
site_j = event['site_j_id']
|
||||
|
||||
# 寻找最佳可用日期
|
||||
best_day = None
|
||||
best_score = float('inf')
|
||||
|
||||
# 搜索范围:理想日期附近
|
||||
search_start = max(1, int(ideal) - 30)
|
||||
search_end = min(DAYS_PER_YEAR, int(ideal) + 30)
|
||||
|
||||
for day in range(search_start, search_end + 1):
|
||||
# 检查槽位是否可用
|
||||
if len(calendar[day]) >= EVENTS_PER_DAY:
|
||||
continue
|
||||
|
||||
# 检查同一站点是否已在当天访问
|
||||
sites_on_day = set()
|
||||
for e in calendar[day]:
|
||||
sites_on_day.add(e['site_i_id'])
|
||||
if e['site_j_id'] is not None:
|
||||
sites_on_day.add(e['site_j_id'])
|
||||
|
||||
if site_i in sites_on_day:
|
||||
continue
|
||||
if site_j is not None and site_j in sites_on_day:
|
||||
continue
|
||||
|
||||
# 计算得分(理想日期偏差 + 间隔惩罚)
|
||||
day_diff = abs(day - ideal)
|
||||
|
||||
# 间隔惩罚:鼓励与上次访问保持距离
|
||||
min_gap = min(
|
||||
day - last_visit[site_i],
|
||||
day - last_visit[site_j] if site_j is not None else float('inf')
|
||||
)
|
||||
gap_penalty = max(0, 7 - min_gap) * 2 # 7天内再次访问有惩罚
|
||||
|
||||
score = day_diff + gap_penalty
|
||||
|
||||
if score < best_score:
|
||||
best_score = score
|
||||
best_day = day
|
||||
|
||||
# 如果附近没找到,扩大搜索范围
|
||||
if best_day is None:
|
||||
for day in range(1, DAYS_PER_YEAR + 1):
|
||||
if len(calendar[day]) < EVENTS_PER_DAY:
|
||||
sites_on_day = set()
|
||||
for e in calendar[day]:
|
||||
sites_on_day.add(e['site_i_id'])
|
||||
if e['site_j_id'] is not None:
|
||||
sites_on_day.add(e['site_j_id'])
|
||||
|
||||
if site_i not in sites_on_day:
|
||||
if site_j is None or site_j not in sites_on_day:
|
||||
best_day = day
|
||||
break
|
||||
|
||||
if best_day is None:
|
||||
print(f"警告: 无法分配事件 {event['event_id']}")
|
||||
best_day = 1 # 强制分配
|
||||
|
||||
# 分配到日历
|
||||
calendar[best_day].append(event.to_dict())
|
||||
assigned_day.append(best_day)
|
||||
|
||||
# 更新最后访问日期
|
||||
last_visit[site_i] = best_day
|
||||
if site_j is not None:
|
||||
last_visit[site_j] = best_day
|
||||
|
||||
df_events_sorted['assigned_day'] = assigned_day
|
||||
|
||||
# ============================================
|
||||
# 生成日历视图
|
||||
# ============================================
|
||||
print(f"\n" + "-" * 40)
|
||||
print("生成日历视图")
|
||||
print("-" * 40)
|
||||
|
||||
calendar_rows = []
|
||||
for day in range(1, DAYS_PER_YEAR + 1):
|
||||
events_on_day = calendar[day]
|
||||
|
||||
row = {'day': day}
|
||||
|
||||
for slot in range(EVENTS_PER_DAY):
|
||||
if slot < len(events_on_day):
|
||||
e = events_on_day[slot]
|
||||
row[f'slot_{slot+1}_type'] = e['event_type']
|
||||
row[f'slot_{slot+1}_site_i'] = e['site_i_id']
|
||||
row[f'slot_{slot+1}_site_j'] = e['site_j_id']
|
||||
row[f'slot_{slot+1}_name_i'] = e['site_i_name']
|
||||
row[f'slot_{slot+1}_name_j'] = e['site_j_name']
|
||||
else:
|
||||
row[f'slot_{slot+1}_type'] = None
|
||||
row[f'slot_{slot+1}_site_i'] = None
|
||||
row[f'slot_{slot+1}_site_j'] = None
|
||||
row[f'slot_{slot+1}_name_i'] = None
|
||||
row[f'slot_{slot+1}_name_j'] = None
|
||||
|
||||
calendar_rows.append(row)
|
||||
|
||||
df_calendar = pd.DataFrame(calendar_rows)
|
||||
|
||||
# ============================================
|
||||
# 统计和验证
|
||||
# ============================================
|
||||
print(f"\n排程统计:")
|
||||
|
||||
# 每天事件数
|
||||
events_per_day = [len(calendar[d]) for d in range(1, DAYS_PER_YEAR + 1)]
|
||||
print(f" - 每天事件数: min={min(events_per_day)}, max={max(events_per_day)}, avg={np.mean(events_per_day):.2f}")
|
||||
|
||||
# 理想日期偏差
|
||||
df_events_sorted['day_diff'] = abs(df_events_sorted['assigned_day'] - df_events_sorted['ideal_day'])
|
||||
print(f" - 理想日期偏差: avg={df_events_sorted['day_diff'].mean():.2f}, max={df_events_sorted['day_diff'].max():.0f}")
|
||||
|
||||
# 访问间隔分析
|
||||
print(f"\n访问间隔分析:")
|
||||
|
||||
site_visits = defaultdict(list)
|
||||
for _, event in df_events_sorted.iterrows():
|
||||
site_visits[event['site_i_id']].append(event['assigned_day'])
|
||||
# 只有双站点访问才有site_j_id
|
||||
if event['site_j_id'] is not None and not (isinstance(event['site_j_id'], float) and np.isnan(event['site_j_id'])):
|
||||
site_visits[event['site_j_id']].append(event['assigned_day'])
|
||||
|
||||
gaps = []
|
||||
for site_id, days in site_visits.items():
|
||||
days_sorted = sorted(days)
|
||||
for i in range(1, len(days_sorted)):
|
||||
gaps.append(days_sorted[i] - days_sorted[i-1])
|
||||
|
||||
if gaps:
|
||||
print(f" - 间隔范围: [{min(gaps)}, {max(gaps)}] 天")
|
||||
print(f" - 平均间隔: {np.mean(gaps):.1f} 天")
|
||||
print(f" - 中位数间隔: {np.median(gaps):.1f} 天")
|
||||
|
||||
# ============================================
|
||||
# 保存结果
|
||||
# ============================================
|
||||
with pd.ExcelWriter(OUTPUT_FILE, engine='openpyxl') as writer:
|
||||
# Sheet 1: 日历视图
|
||||
df_calendar.to_excel(writer, sheet_name='calendar', index=False)
|
||||
|
||||
# Sheet 2: 事件详情(含分配日期)
|
||||
df_events_sorted.to_excel(writer, sheet_name='events', index=False)
|
||||
|
||||
# Sheet 3: 站点访问日期汇总
|
||||
site_schedule = []
|
||||
for site_id, days in site_visits.items():
|
||||
# 跳过None/NaN值
|
||||
if site_id is None or (isinstance(site_id, float) and np.isnan(site_id)):
|
||||
continue
|
||||
# 处理可能的类型不匹配
|
||||
site_row = df_sites[df_sites['site_id'] == int(site_id)]
|
||||
if len(site_row) > 0:
|
||||
site_name = site_row['site_name'].values[0]
|
||||
else:
|
||||
site_name = f"Site_{site_id}"
|
||||
days_sorted = sorted(days)
|
||||
site_schedule.append({
|
||||
'site_id': int(site_id),
|
||||
'site_name': site_name,
|
||||
'total_visits': len(days),
|
||||
'visit_days': ','.join(map(str, days_sorted)),
|
||||
'first_visit': days_sorted[0],
|
||||
'last_visit': days_sorted[-1]
|
||||
})
|
||||
df_site_schedule = pd.DataFrame(site_schedule)
|
||||
df_site_schedule.to_excel(writer, sheet_name='site_visits', index=False)
|
||||
|
||||
# Sheet 4: 汇总统计
|
||||
summary = pd.DataFrame({
|
||||
'metric': ['total_days', 'total_events', 'single_events', 'dual_events',
|
||||
'avg_day_diff', 'max_day_diff', 'avg_gap', 'min_gap', 'max_gap'],
|
||||
'value': [DAYS_PER_YEAR, len(df_events),
|
||||
(df_events['event_type'] == 'single').sum(),
|
||||
(df_events['event_type'] == 'dual').sum(),
|
||||
df_events_sorted['day_diff'].mean(),
|
||||
df_events_sorted['day_diff'].max(),
|
||||
np.mean(gaps) if gaps else 0,
|
||||
min(gaps) if gaps else 0,
|
||||
max(gaps) if gaps else 0]
|
||||
})
|
||||
summary.to_excel(writer, sheet_name='summary', index=False)
|
||||
|
||||
print(f"\n结果已保存至: {OUTPUT_FILE}")
|
||||
print(" - Sheet 'calendar': 365天日历视图")
|
||||
print(" - Sheet 'events': 事件详情")
|
||||
print(" - Sheet 'site_visits': 站点访问日期汇总")
|
||||
print(" - Sheet 'summary': 汇总统计")
|
||||
print("\n" + "=" * 60)
|
||||
Reference in New Issue
Block a user