296 lines
10 KiB
Python
296 lines
10 KiB
Python
|
|
"""
|
||
|
|
Scheduling Optimization (CP-SAT)
|
||
|
|
|
||
|
|
Goal:
|
||
|
|
- Given required visit frequencies f_i for each site i, create a 365-day schedule
|
||
|
|
with at most 2 visits per day, and visits per site spaced as regularly as possible.
|
||
|
|
|
||
|
|
Defaults match the current repo setup:
|
||
|
|
- Days T = 365, daily capacity = 2, Gap_min = 14 days
|
||
|
|
- Frequencies are read from `data/kmin_effectiveness_data.csv` (columns visits_01..visits_N)
|
||
|
|
using the selection rule: first row where gini_eff < 0.2, otherwise best effectiveness.
|
||
|
|
|
||
|
|
Outputs (written to data/):
|
||
|
|
- schedule_optimized_kminX.X_gap14.csv (wide per-day table, 2 slots)
|
||
|
|
- schedule_long_kminX.X_gap14.csv (long table, one row per visit)
|
||
|
|
- site_visits_kminX.X_gap14.csv (per-site visit days and intervals)
|
||
|
|
"""
|
||
|
|
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import argparse
|
||
|
|
import os
|
||
|
|
from dataclasses import dataclass
|
||
|
|
from typing import List, Optional, Tuple
|
||
|
|
|
||
|
|
import pandas as pd
|
||
|
|
|
||
|
|
|
||
|
|
OUTPUT_DIR = "data"
|
||
|
|
DEFAULT_RESULTS_CSV = os.path.join(OUTPUT_DIR, "kmin_effectiveness_data.csv")
|
||
|
|
DEFAULT_SITES_CSV = os.path.join(OUTPUT_DIR, "kmin_effectiveness_sites.csv")
|
||
|
|
|
||
|
|
|
||
|
|
@dataclass(frozen=True)
|
||
|
|
class SchedulingInputs:
|
||
|
|
days: int
|
||
|
|
daily_capacity: int
|
||
|
|
gap_min: int
|
||
|
|
k_min: float
|
||
|
|
site_names: List[str]
|
||
|
|
frequencies: List[int]
|
||
|
|
|
||
|
|
|
||
|
|
def _select_kmin_row(df: pd.DataFrame, k_min: Optional[float]) -> Tuple[float, pd.Series]:
|
||
|
|
if k_min is not None:
|
||
|
|
idx = (df["k_min"] - float(k_min)).abs().idxmin()
|
||
|
|
row = df.loc[idx]
|
||
|
|
return float(row["k_min"]), row
|
||
|
|
|
||
|
|
candidates = df.loc[df["gini_eff"] < 0.2]
|
||
|
|
if len(candidates) > 0:
|
||
|
|
row = candidates.iloc[0]
|
||
|
|
return float(row["k_min"]), row
|
||
|
|
|
||
|
|
idx = df["effectiveness"].idxmax()
|
||
|
|
row = df.loc[idx]
|
||
|
|
return float(row["k_min"]), row
|
||
|
|
|
||
|
|
|
||
|
|
def load_inputs(
|
||
|
|
*,
|
||
|
|
results_csv: str = DEFAULT_RESULTS_CSV,
|
||
|
|
sites_csv: str = DEFAULT_SITES_CSV,
|
||
|
|
days: int = 365,
|
||
|
|
daily_capacity: int = 2,
|
||
|
|
gap_min: int = 14,
|
||
|
|
k_min: Optional[float] = None,
|
||
|
|
) -> SchedulingInputs:
|
||
|
|
df_results = pd.read_csv(results_csv)
|
||
|
|
selected_k, row = _select_kmin_row(df_results, k_min)
|
||
|
|
|
||
|
|
visit_cols = [c for c in df_results.columns if c.startswith("visits_")]
|
||
|
|
if not visit_cols:
|
||
|
|
raise ValueError(f"No visits_* columns found in {results_csv}")
|
||
|
|
|
||
|
|
freqs = [int(row[c]) for c in visit_cols]
|
||
|
|
if any(f < 0 for f in freqs):
|
||
|
|
raise ValueError("Frequencies must be non-negative")
|
||
|
|
|
||
|
|
df_sites = pd.read_csv(sites_csv)
|
||
|
|
# site_idx in files is 1-based, and matches visits_01.. ordering.
|
||
|
|
df_sites = df_sites.sort_values("site_idx")
|
||
|
|
site_names = df_sites["site_name"].astype(str).tolist()
|
||
|
|
if len(site_names) != len(freqs):
|
||
|
|
raise ValueError(
|
||
|
|
f"Site count mismatch: {len(site_names)} sites in {sites_csv}, {len(freqs)} frequencies in {results_csv}"
|
||
|
|
)
|
||
|
|
|
||
|
|
if days <= 0:
|
||
|
|
raise ValueError("days must be > 0")
|
||
|
|
if daily_capacity <= 0:
|
||
|
|
raise ValueError("daily_capacity must be > 0")
|
||
|
|
if gap_min < 1:
|
||
|
|
raise ValueError("gap_min must be >= 1")
|
||
|
|
|
||
|
|
for i, f in enumerate(freqs, start=1):
|
||
|
|
if f >= 2 and gap_min * (f - 1) > (days - 1):
|
||
|
|
raise ValueError(
|
||
|
|
f"Infeasible for site {i}: f_i={f} with gap_min={gap_min} does not fit in {days} days."
|
||
|
|
)
|
||
|
|
|
||
|
|
return SchedulingInputs(
|
||
|
|
days=days,
|
||
|
|
daily_capacity=daily_capacity,
|
||
|
|
gap_min=gap_min,
|
||
|
|
k_min=selected_k,
|
||
|
|
site_names=site_names,
|
||
|
|
frequencies=freqs,
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def solve_schedule_cp_sat(
|
||
|
|
inputs: SchedulingInputs,
|
||
|
|
*,
|
||
|
|
time_limit_s: float = 60.0,
|
||
|
|
num_workers: Optional[int] = None,
|
||
|
|
) -> List[Tuple[int, int]]:
|
||
|
|
"""
|
||
|
|
Returns a list of (day, site_idx) visits, where day is 0-based and site_idx is 1-based.
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
from ortools.sat.python import cp_model
|
||
|
|
except ModuleNotFoundError as e:
|
||
|
|
raise RuntimeError(
|
||
|
|
"Missing dependency: ortools. Install with `pip install ortools` and rerun."
|
||
|
|
) from e
|
||
|
|
|
||
|
|
days = inputs.days
|
||
|
|
cap = inputs.daily_capacity
|
||
|
|
gap_min = inputs.gap_min
|
||
|
|
freqs = inputs.frequencies
|
||
|
|
|
||
|
|
model = cp_model.CpModel()
|
||
|
|
|
||
|
|
all_intervals = []
|
||
|
|
visit_starts: List[List[cp_model.IntVar]] = []
|
||
|
|
|
||
|
|
for site_idx_1based, f_i in enumerate(freqs, start=1):
|
||
|
|
starts_i: List[cp_model.IntVar] = []
|
||
|
|
for k in range(f_i):
|
||
|
|
start = model.NewIntVar(0, days - 1, f"s_{site_idx_1based}_{k}")
|
||
|
|
end = model.NewIntVar(1, days, f"e_{site_idx_1based}_{k}")
|
||
|
|
model.Add(end == start + 1)
|
||
|
|
interval = model.NewIntervalVar(start, 1, end, f"iv_{site_idx_1based}_{k}")
|
||
|
|
all_intervals.append(interval)
|
||
|
|
starts_i.append(start)
|
||
|
|
for k in range(f_i - 1):
|
||
|
|
model.Add(starts_i[k + 1] >= starts_i[k] + gap_min)
|
||
|
|
visit_starts.append(starts_i)
|
||
|
|
|
||
|
|
# At most 2 trucks per day (each visit is a 1-day interval, demand=1).
|
||
|
|
model.AddCumulative(all_intervals, [1] * len(all_intervals), cap)
|
||
|
|
|
||
|
|
# Objective: minimize total absolute deviation from ideal gap (days / f_i).
|
||
|
|
objective_terms = []
|
||
|
|
for site_idx_1based, f_i in enumerate(freqs, start=1):
|
||
|
|
if f_i < 2:
|
||
|
|
continue
|
||
|
|
target_scaled = int(round((days * 100.0) / f_i))
|
||
|
|
starts_i = visit_starts[site_idx_1based - 1]
|
||
|
|
for k in range(f_i - 1):
|
||
|
|
gap = model.NewIntVar(gap_min, days - 1, f"gap_{site_idx_1based}_{k}")
|
||
|
|
model.Add(gap == starts_i[k + 1] - starts_i[k])
|
||
|
|
|
||
|
|
diff = model.NewIntVar(-(days * 100), days * 100, f"diff_{site_idx_1based}_{k}")
|
||
|
|
model.Add(diff == gap * 100 - target_scaled)
|
||
|
|
|
||
|
|
dev = model.NewIntVar(0, days * 100, f"dev_{site_idx_1based}_{k}")
|
||
|
|
model.AddAbsEquality(dev, diff)
|
||
|
|
objective_terms.append(dev)
|
||
|
|
|
||
|
|
model.Minimize(sum(objective_terms) if objective_terms else 0)
|
||
|
|
|
||
|
|
solver = cp_model.CpSolver()
|
||
|
|
solver.parameters.max_time_in_seconds = float(time_limit_s)
|
||
|
|
if num_workers is not None:
|
||
|
|
solver.parameters.num_search_workers = int(num_workers)
|
||
|
|
|
||
|
|
status = solver.Solve(model)
|
||
|
|
if status not in (cp_model.OPTIMAL, cp_model.FEASIBLE):
|
||
|
|
raise RuntimeError(f"No feasible schedule found (status={status}). Try relaxing constraints.")
|
||
|
|
|
||
|
|
visits: List[Tuple[int, int]] = []
|
||
|
|
for site_idx_1based, starts_i in enumerate(visit_starts, start=1):
|
||
|
|
for start in starts_i:
|
||
|
|
visits.append((int(solver.Value(start)), site_idx_1based))
|
||
|
|
|
||
|
|
return visits
|
||
|
|
|
||
|
|
|
||
|
|
def write_outputs(
|
||
|
|
inputs: SchedulingInputs,
|
||
|
|
visits: List[Tuple[int, int]],
|
||
|
|
) -> None:
|
||
|
|
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||
|
|
|
||
|
|
k_tag = f"{inputs.k_min:.1f}"
|
||
|
|
stem = f"kmin{k_tag}_gap{inputs.gap_min}"
|
||
|
|
|
||
|
|
# Wide per-day schedule (two slots).
|
||
|
|
day_to_sites = {d: [] for d in range(inputs.days)}
|
||
|
|
for day0, site_idx in visits:
|
||
|
|
day_to_sites[day0].append(site_idx)
|
||
|
|
for d in range(inputs.days):
|
||
|
|
day_to_sites[d].sort()
|
||
|
|
|
||
|
|
wide_rows = []
|
||
|
|
for day0 in range(inputs.days):
|
||
|
|
slots = day_to_sites[day0]
|
||
|
|
s1 = slots[0] if len(slots) > 0 else ""
|
||
|
|
s2 = slots[1] if len(slots) > 1 else ""
|
||
|
|
n1 = inputs.site_names[s1 - 1] if s1 else ""
|
||
|
|
n2 = inputs.site_names[s2 - 1] if s2 else ""
|
||
|
|
wide_rows.append(
|
||
|
|
{
|
||
|
|
"day": day0 + 1,
|
||
|
|
"site1_idx": s1,
|
||
|
|
"site1_name": n1,
|
||
|
|
"site2_idx": s2,
|
||
|
|
"site2_name": n2,
|
||
|
|
}
|
||
|
|
)
|
||
|
|
pd.DataFrame(wide_rows).to_csv(os.path.join(OUTPUT_DIR, f"schedule_optimized_{stem}.csv"), index=False)
|
||
|
|
|
||
|
|
# Long schedule (one row per visit).
|
||
|
|
long_rows = []
|
||
|
|
for day0, site_idx in sorted(visits):
|
||
|
|
long_rows.append(
|
||
|
|
{
|
||
|
|
"day": day0 + 1,
|
||
|
|
"site_idx": site_idx,
|
||
|
|
"site_name": inputs.site_names[site_idx - 1],
|
||
|
|
}
|
||
|
|
)
|
||
|
|
pd.DataFrame(long_rows).to_csv(os.path.join(OUTPUT_DIR, f"schedule_long_{stem}.csv"), index=False)
|
||
|
|
|
||
|
|
# Per-site visit days and intervals.
|
||
|
|
site_rows = []
|
||
|
|
per_site = {i: [] for i in range(1, len(inputs.site_names) + 1)}
|
||
|
|
for day0, site_idx in visits:
|
||
|
|
per_site[site_idx].append(day0 + 1)
|
||
|
|
for site_idx in per_site:
|
||
|
|
per_site[site_idx].sort()
|
||
|
|
|
||
|
|
for site_idx, days_list in per_site.items():
|
||
|
|
gaps = [b - a for a, b in zip(days_list, days_list[1:])]
|
||
|
|
site_rows.append(
|
||
|
|
{
|
||
|
|
"site_idx": site_idx,
|
||
|
|
"site_name": inputs.site_names[site_idx - 1],
|
||
|
|
"f_i": inputs.frequencies[site_idx - 1],
|
||
|
|
"visit_days": ";".join(map(str, days_list)),
|
||
|
|
"gaps": ";".join(map(str, gaps)),
|
||
|
|
"min_gap": min(gaps) if gaps else "",
|
||
|
|
"avg_gap": (sum(gaps) / len(gaps)) if gaps else "",
|
||
|
|
}
|
||
|
|
)
|
||
|
|
pd.DataFrame(site_rows).to_csv(os.path.join(OUTPUT_DIR, f"site_visits_{stem}.csv"), index=False)
|
||
|
|
|
||
|
|
|
||
|
|
def main() -> None:
|
||
|
|
parser = argparse.ArgumentParser(description="Optimize 365-day MFP schedule with CP-SAT.")
|
||
|
|
parser.add_argument("--results-csv", default=DEFAULT_RESULTS_CSV)
|
||
|
|
parser.add_argument("--sites-csv", default=DEFAULT_SITES_CSV)
|
||
|
|
parser.add_argument("--days", type=int, default=365)
|
||
|
|
parser.add_argument("--daily-capacity", type=int, default=2)
|
||
|
|
parser.add_argument("--gap-min", type=int, default=14)
|
||
|
|
parser.add_argument("--kmin", type=float, default=None, help="Pick the nearest k_min row from results CSV.")
|
||
|
|
parser.add_argument("--time-limit", type=float, default=60.0)
|
||
|
|
parser.add_argument("--workers", type=int, default=None)
|
||
|
|
args = parser.parse_args()
|
||
|
|
|
||
|
|
inputs = load_inputs(
|
||
|
|
results_csv=args.results_csv,
|
||
|
|
sites_csv=args.sites_csv,
|
||
|
|
days=args.days,
|
||
|
|
daily_capacity=args.daily_capacity,
|
||
|
|
gap_min=args.gap_min,
|
||
|
|
k_min=args.kmin,
|
||
|
|
)
|
||
|
|
|
||
|
|
total_visits = sum(inputs.frequencies)
|
||
|
|
print(
|
||
|
|
f"Selected k_min={inputs.k_min:.1f}, sites={len(inputs.site_names)}, "
|
||
|
|
f"total_visits={total_visits}, capacity={inputs.days * inputs.daily_capacity}, gap_min={inputs.gap_min}"
|
||
|
|
)
|
||
|
|
|
||
|
|
visits = solve_schedule_cp_sat(inputs, time_limit_s=args.time_limit, num_workers=args.workers)
|
||
|
|
write_outputs(inputs, visits)
|
||
|
|
print(f"Saved schedule CSVs to `{OUTPUT_DIR}/` (k_min={inputs.k_min:.1f}).")
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|
||
|
|
|