""" Scheduling Optimization (CP-SAT) Goal: - Given required visit frequencies f_i for each site i, create a 365-day schedule with at most 2 visits per day, and visits per site spaced as regularly as possible. Defaults match the current repo setup: - Days T = 365, daily capacity = 2, Gap_min = 14 days - Frequencies are read from `data/kmin_effectiveness_data.csv` (columns visits_01..visits_N) using the selection rule: first row where gini_eff < 0.2, otherwise best effectiveness. Outputs (written to data/): - schedule_optimized_kminX.X_gap14.csv (wide per-day table, 2 slots) - schedule_long_kminX.X_gap14.csv (long table, one row per visit) - site_visits_kminX.X_gap14.csv (per-site visit days and intervals) """ from __future__ import annotations import argparse import os from dataclasses import dataclass from typing import List, Optional, Tuple import pandas as pd OUTPUT_DIR = "data" DEFAULT_RESULTS_CSV = os.path.join(OUTPUT_DIR, "kmin_effectiveness_data.csv") DEFAULT_SITES_CSV = os.path.join(OUTPUT_DIR, "kmin_effectiveness_sites.csv") @dataclass(frozen=True) class SchedulingInputs: days: int daily_capacity: int gap_min: int k_min: float site_names: List[str] frequencies: List[int] def _select_kmin_row(df: pd.DataFrame, k_min: Optional[float]) -> Tuple[float, pd.Series]: if k_min is not None: idx = (df["k_min"] - float(k_min)).abs().idxmin() row = df.loc[idx] return float(row["k_min"]), row candidates = df.loc[df["gini_eff"] < 0.2] if len(candidates) > 0: row = candidates.iloc[0] return float(row["k_min"]), row idx = df["effectiveness"].idxmax() row = df.loc[idx] return float(row["k_min"]), row def load_inputs( *, results_csv: str = DEFAULT_RESULTS_CSV, sites_csv: str = DEFAULT_SITES_CSV, days: int = 365, daily_capacity: int = 2, gap_min: int = 14, k_min: Optional[float] = None, ) -> SchedulingInputs: df_results = pd.read_csv(results_csv) selected_k, row = _select_kmin_row(df_results, k_min) visit_cols = [c for c in df_results.columns if c.startswith("visits_")] if not visit_cols: raise ValueError(f"No visits_* columns found in {results_csv}") freqs = [int(row[c]) for c in visit_cols] if any(f < 0 for f in freqs): raise ValueError("Frequencies must be non-negative") df_sites = pd.read_csv(sites_csv) # site_idx in files is 1-based, and matches visits_01.. ordering. df_sites = df_sites.sort_values("site_idx") site_names = df_sites["site_name"].astype(str).tolist() if len(site_names) != len(freqs): raise ValueError( f"Site count mismatch: {len(site_names)} sites in {sites_csv}, {len(freqs)} frequencies in {results_csv}" ) if days <= 0: raise ValueError("days must be > 0") if daily_capacity <= 0: raise ValueError("daily_capacity must be > 0") if gap_min < 1: raise ValueError("gap_min must be >= 1") for i, f in enumerate(freqs, start=1): if f >= 2 and gap_min * (f - 1) > (days - 1): raise ValueError( f"Infeasible for site {i}: f_i={f} with gap_min={gap_min} does not fit in {days} days." ) return SchedulingInputs( days=days, daily_capacity=daily_capacity, gap_min=gap_min, k_min=selected_k, site_names=site_names, frequencies=freqs, ) def solve_schedule_cp_sat( inputs: SchedulingInputs, *, time_limit_s: float = 60.0, num_workers: Optional[int] = None, ) -> List[Tuple[int, int]]: """ Returns a list of (day, site_idx) visits, where day is 0-based and site_idx is 1-based. """ try: from ortools.sat.python import cp_model except ModuleNotFoundError as e: raise RuntimeError( "Missing dependency: ortools. Install with `pip install ortools` and rerun." ) from e days = inputs.days cap = inputs.daily_capacity gap_min = inputs.gap_min freqs = inputs.frequencies model = cp_model.CpModel() all_intervals = [] visit_starts: List[List[cp_model.IntVar]] = [] for site_idx_1based, f_i in enumerate(freqs, start=1): starts_i: List[cp_model.IntVar] = [] for k in range(f_i): start = model.NewIntVar(0, days - 1, f"s_{site_idx_1based}_{k}") end = model.NewIntVar(1, days, f"e_{site_idx_1based}_{k}") model.Add(end == start + 1) interval = model.NewIntervalVar(start, 1, end, f"iv_{site_idx_1based}_{k}") all_intervals.append(interval) starts_i.append(start) for k in range(f_i - 1): model.Add(starts_i[k + 1] >= starts_i[k] + gap_min) visit_starts.append(starts_i) # At most 2 trucks per day (each visit is a 1-day interval, demand=1). model.AddCumulative(all_intervals, [1] * len(all_intervals), cap) # Objective: minimize total absolute deviation from ideal gap (days / f_i). objective_terms = [] for site_idx_1based, f_i in enumerate(freqs, start=1): if f_i < 2: continue target_scaled = int(round((days * 100.0) / f_i)) starts_i = visit_starts[site_idx_1based - 1] for k in range(f_i - 1): gap = model.NewIntVar(gap_min, days - 1, f"gap_{site_idx_1based}_{k}") model.Add(gap == starts_i[k + 1] - starts_i[k]) diff = model.NewIntVar(-(days * 100), days * 100, f"diff_{site_idx_1based}_{k}") model.Add(diff == gap * 100 - target_scaled) dev = model.NewIntVar(0, days * 100, f"dev_{site_idx_1based}_{k}") model.AddAbsEquality(dev, diff) objective_terms.append(dev) model.Minimize(sum(objective_terms) if objective_terms else 0) solver = cp_model.CpSolver() solver.parameters.max_time_in_seconds = float(time_limit_s) if num_workers is not None: solver.parameters.num_search_workers = int(num_workers) status = solver.Solve(model) if status not in (cp_model.OPTIMAL, cp_model.FEASIBLE): raise RuntimeError(f"No feasible schedule found (status={status}). Try relaxing constraints.") visits: List[Tuple[int, int]] = [] for site_idx_1based, starts_i in enumerate(visit_starts, start=1): for start in starts_i: visits.append((int(solver.Value(start)), site_idx_1based)) return visits def write_outputs( inputs: SchedulingInputs, visits: List[Tuple[int, int]], ) -> None: os.makedirs(OUTPUT_DIR, exist_ok=True) k_tag = f"{inputs.k_min:.1f}" stem = f"kmin{k_tag}_gap{inputs.gap_min}" # Wide per-day schedule (two slots). day_to_sites = {d: [] for d in range(inputs.days)} for day0, site_idx in visits: day_to_sites[day0].append(site_idx) for d in range(inputs.days): day_to_sites[d].sort() wide_rows = [] for day0 in range(inputs.days): slots = day_to_sites[day0] s1 = slots[0] if len(slots) > 0 else "" s2 = slots[1] if len(slots) > 1 else "" n1 = inputs.site_names[s1 - 1] if s1 else "" n2 = inputs.site_names[s2 - 1] if s2 else "" wide_rows.append( { "day": day0 + 1, "site1_idx": s1, "site1_name": n1, "site2_idx": s2, "site2_name": n2, } ) pd.DataFrame(wide_rows).to_csv(os.path.join(OUTPUT_DIR, f"schedule_optimized_{stem}.csv"), index=False) # Long schedule (one row per visit). long_rows = [] for day0, site_idx in sorted(visits): long_rows.append( { "day": day0 + 1, "site_idx": site_idx, "site_name": inputs.site_names[site_idx - 1], } ) pd.DataFrame(long_rows).to_csv(os.path.join(OUTPUT_DIR, f"schedule_long_{stem}.csv"), index=False) # Per-site visit days and intervals. site_rows = [] per_site = {i: [] for i in range(1, len(inputs.site_names) + 1)} for day0, site_idx in visits: per_site[site_idx].append(day0 + 1) for site_idx in per_site: per_site[site_idx].sort() for site_idx, days_list in per_site.items(): gaps = [b - a for a, b in zip(days_list, days_list[1:])] site_rows.append( { "site_idx": site_idx, "site_name": inputs.site_names[site_idx - 1], "f_i": inputs.frequencies[site_idx - 1], "visit_days": ";".join(map(str, days_list)), "gaps": ";".join(map(str, gaps)), "min_gap": min(gaps) if gaps else "", "avg_gap": (sum(gaps) / len(gaps)) if gaps else "", } ) pd.DataFrame(site_rows).to_csv(os.path.join(OUTPUT_DIR, f"site_visits_{stem}.csv"), index=False) def main() -> None: parser = argparse.ArgumentParser(description="Optimize 365-day MFP schedule with CP-SAT.") parser.add_argument("--results-csv", default=DEFAULT_RESULTS_CSV) parser.add_argument("--sites-csv", default=DEFAULT_SITES_CSV) parser.add_argument("--days", type=int, default=365) parser.add_argument("--daily-capacity", type=int, default=2) parser.add_argument("--gap-min", type=int, default=14) parser.add_argument("--kmin", type=float, default=None, help="Pick the nearest k_min row from results CSV.") parser.add_argument("--time-limit", type=float, default=60.0) parser.add_argument("--workers", type=int, default=None) args = parser.parse_args() inputs = load_inputs( results_csv=args.results_csv, sites_csv=args.sites_csv, days=args.days, daily_capacity=args.daily_capacity, gap_min=args.gap_min, k_min=args.kmin, ) total_visits = sum(inputs.frequencies) print( f"Selected k_min={inputs.k_min:.1f}, sites={len(inputs.site_names)}, " f"total_visits={total_visits}, capacity={inputs.days * inputs.daily_capacity}, gap_min={inputs.gap_min}" ) visits = solve_schedule_cp_sat(inputs, time_limit_s=args.time_limit, num_workers=args.workers) write_outputs(inputs, visits) print(f"Saved schedule CSVs to `{OUTPUT_DIR}/` (k_min={inputs.k_min:.1f}).") if __name__ == "__main__": main()