Files
mcm-mfp/task1/04_schedule_2021.py
2026-01-19 01:40:19 +08:00

220 lines
7.8 KiB
Python

from __future__ import annotations
import bisect
import datetime as dt
from dataclasses import dataclass
import numpy as np
import pandas as pd
ALLOC_XLSX = "task1/03_allocate.xlsx"
OUTPUT_XLSX = "task1/04_schedule.xlsx"
YEAR = 2021
DAYS = 365
SLOTS_PER_DAY = 2 # scenario B: 2 trucks, 2 distinct sites/day
# Default recommendation
DEFAULT_SCENARIO = "rho20"
DEFAULT_METHOD = "proportional_D"
@dataclass(frozen=True)
class Event:
site_id: int
site_name: str
target_day: int # 1..365
def build_targets(site_id: int, site_name: str, k: int) -> list[Event]:
if k <= 0:
return []
targets: list[Event] = []
for j in range(k):
# Even spacing: place j-th visit at (j+0.5)*DAYS/k
t = int(round((j + 0.5) * DAYS / k))
t = max(1, min(DAYS, t))
targets.append(Event(site_id=site_id, site_name=site_name, target_day=t))
return targets
def assign_events_to_days(events: list[Event]) -> dict[int, list[Event]]:
# Initial binning by rounded target day
day_to_events: dict[int, list[Event]] = {d: [] for d in range(1, DAYS + 1)}
overflow: list[Event] = []
# Put into bins
for ev in sorted(events, key=lambda e: (e.target_day, e.site_id)):
day_to_events[ev.target_day].append(ev)
# Enforce per-day capacity and per-day unique site_id
for day in range(1, DAYS + 1):
bucket = day_to_events[day]
if not bucket:
continue
seen: set[int] = set()
kept: list[Event] = []
for ev in bucket:
if ev.site_id in seen:
overflow.append(ev)
else:
seen.add(ev.site_id)
kept.append(ev)
# If still over capacity, keep earliest (already sorted) and overflow rest
day_to_events[day] = kept[:SLOTS_PER_DAY]
overflow.extend(kept[SLOTS_PER_DAY:])
# Underfull days list
underfull_days: list[int] = []
for day in range(1, DAYS + 1):
cap = SLOTS_PER_DAY - len(day_to_events[day])
underfull_days.extend([day] * cap)
underfull_days.sort()
# Fill underfull days with closest assignment to target_day
def day_has_site(day: int, site_id: int) -> bool:
return any(ev.site_id == site_id for ev in day_to_events[day])
for ev in sorted(overflow, key=lambda e: (e.target_day, e.site_id)):
if not underfull_days:
raise RuntimeError("No remaining capacity but overflow events remain.")
pos = bisect.bisect_left(underfull_days, ev.target_day)
candidate_positions = []
for delta in range(0, len(underfull_days)):
# Check outward from the insertion point
for p in (pos - delta, pos + delta):
if 0 <= p < len(underfull_days):
candidate_positions.append(p)
if candidate_positions:
# We gathered some; break after first ring to keep cost small
break
assigned_idx = None
for p in candidate_positions:
day = underfull_days[p]
if not day_has_site(day, ev.site_id):
assigned_idx = p
break
if assigned_idx is None:
# Fallback: scan until we find any feasible slot
for p, day in enumerate(underfull_days):
if not day_has_site(day, ev.site_id):
assigned_idx = p
break
if assigned_idx is None:
raise RuntimeError(f"Unable to place event for site_id={ev.site_id}; per-day uniqueness too strict.")
day = underfull_days.pop(assigned_idx)
day_to_events[day].append(ev)
# Final sanity: every day filled, and no day has duplicate site_id
for day in range(1, DAYS + 1):
if len(day_to_events[day]) != SLOTS_PER_DAY:
raise RuntimeError(f"Day {day} not filled: {len(day_to_events[day])} events.")
ids = [e.site_id for e in day_to_events[day]]
if len(set(ids)) != len(ids):
raise RuntimeError(f"Day {day} has duplicate site assignments.")
return day_to_events
def main() -> None:
alloc = pd.read_excel(ALLOC_XLSX, sheet_name="allocations")
k_col = f"k_{DEFAULT_METHOD}_{DEFAULT_SCENARIO}"
if k_col not in alloc.columns:
raise ValueError(f"Allocation column not found: {k_col}")
alloc = alloc[["site_id", "site_name", k_col]].copy()
alloc = alloc.rename(columns={k_col: "k_2021"})
alloc["k_2021"] = pd.to_numeric(alloc["k_2021"], errors="raise").astype(int)
if int(alloc["k_2021"].sum()) != DAYS * SLOTS_PER_DAY:
raise ValueError("k_2021 does not match total required slots.")
if (alloc["k_2021"] < 1).any():
raise ValueError("k_2021 violates coverage constraint k_i >= 1.")
events: list[Event] = []
for row in alloc.itertuples(index=False):
events.extend(build_targets(site_id=int(row.site_id), site_name=str(row.site_name), k=int(row.k_2021)))
if len(events) != DAYS * SLOTS_PER_DAY:
raise RuntimeError("Generated events mismatch total required slots.")
day_to_events = assign_events_to_days(events)
start = dt.date(YEAR, 1, 1)
calendar_rows: list[dict[str, object]] = []
per_site_rows: list[dict[str, object]] = []
for day in range(1, DAYS + 1):
date = start + dt.timedelta(days=day - 1)
evs = sorted(day_to_events[day], key=lambda e: e.site_id)
calendar_rows.append(
{
"date": date.isoformat(),
"day_of_year": day,
"site1_id": evs[0].site_id,
"site1_name": evs[0].site_name,
"site2_id": evs[1].site_id,
"site2_name": evs[1].site_name,
}
)
for slot, ev in enumerate(evs, start=1):
per_site_rows.append(
{
"site_id": ev.site_id,
"site_name": ev.site_name,
"date": date.isoformat(),
"day_of_year": day,
"slot": slot,
"target_day": ev.target_day,
}
)
calendar_df = pd.DataFrame(calendar_rows)
site_dates_df = pd.DataFrame(per_site_rows).sort_values(["site_id", "day_of_year"]).reset_index(drop=True)
# Schedule quality metrics: gaps between visits for each site
gap_rows: list[dict[str, object]] = []
for site_id, group in site_dates_df.groupby("site_id"):
days = group["day_of_year"].to_numpy(int)
gaps = np.diff(days)
if len(gaps) == 0:
gap_rows.append({"site_id": int(site_id), "k": 1, "gap_max": None, "gap_mean": None, "gap_std": None})
else:
gap_rows.append(
{
"site_id": int(site_id),
"k": int(len(days)),
"gap_max": int(gaps.max()),
"gap_mean": float(gaps.mean()),
"gap_std": float(gaps.std(ddof=0)),
}
)
gap_df = pd.DataFrame(gap_rows).merge(alloc[["site_id", "site_name"]], on="site_id", how="left")
meta_df = pd.DataFrame(
[
{"key": "year", "value": YEAR},
{"key": "days", "value": DAYS},
{"key": "slots_per_day", "value": SLOTS_PER_DAY},
{"key": "total_visits", "value": int(DAYS * SLOTS_PER_DAY)},
{"key": "allocation_scenario", "value": DEFAULT_SCENARIO},
{"key": "allocation_method", "value": DEFAULT_METHOD},
{"key": "k_column", "value": k_col},
]
)
with pd.ExcelWriter(OUTPUT_XLSX, engine="openpyxl") as writer:
meta_df.to_excel(writer, index=False, sheet_name="meta")
calendar_df.to_excel(writer, index=False, sheet_name="calendar")
site_dates_df.to_excel(writer, index=False, sheet_name="site_dates")
gap_df.to_excel(writer, index=False, sheet_name="gap_metrics")
if __name__ == "__main__":
main()