from __future__ import annotations import math from dataclasses import dataclass import numpy as np import pandas as pd INPUT_XLSX = "task1/02_neighbor.xlsx" OUTPUT_XLSX = "task1/03_allocate.xlsx" N_TOTAL_2021 = 730 # scenario B: 2 sites/day * 365 days K_MIN = 1 RHO_COLS = [ ("self", "neighbor_demand_mu_self"), ("rho10", "neighbor_demand_mu_rho_10mi"), ("rho20", "neighbor_demand_mu_rho_20mi"), ("rho30", "neighbor_demand_mu_rho_30mi"), ] def gini(x: np.ndarray) -> float: x = np.asarray(x, dtype=float) if np.any(x < 0): raise ValueError("Gini expects nonnegative values.") if np.allclose(x.sum(), 0.0): return 0.0 n = len(x) diff_sum = np.abs(x.reshape(-1, 1) - x.reshape(1, -1)).sum() return diff_sum / (2.0 * n * n * x.mean()) def largest_remainder_allocation(weights: np.ndarray, total: int, k_min: int) -> np.ndarray: if total < k_min * len(weights): raise ValueError("total too small for k_min constraint.") w = np.asarray(weights, dtype=float) if np.any(w < 0): raise ValueError("weights must be nonnegative.") if np.allclose(w.sum(), 0.0): # Purely equal split base = np.full(len(w), total // len(w), dtype=int) base[: total - base.sum()] += 1 return np.maximum(base, k_min) remaining = total - k_min * len(w) w_norm = w / w.sum() raw = remaining * w_norm flo = np.floor(raw).astype(int) k = flo + k_min need = total - k.sum() if need > 0: frac = raw - flo order = np.argsort(-frac, kind="stable") k[order[:need]] += 1 elif need < 0: frac = raw - flo order = np.argsort(frac, kind="stable") to_remove = -need for idx in order: if to_remove == 0: break if k[idx] > k_min: k[idx] -= 1 to_remove -= 1 if k.sum() != total: raise RuntimeError("Failed to adjust allocation to exact total.") if k.sum() != total: raise RuntimeError("Allocation did not match total.") if (k < k_min).any(): raise RuntimeError("Allocation violated k_min.") return k def feasibility_sum_for_t(t: float, d: np.ndarray, mu: np.ndarray, k_min: int) -> int: # Constraints: k_i >= max(k_min, ceil(t * D_i / mu_i)) if t < 0: return math.inf req = np.ceil((t * d) / mu).astype(int) req = np.maximum(req, k_min) return int(req.sum()) def max_min_service_level(d: np.ndarray, mu: np.ndarray, n_total: int, k_min: int) -> tuple[float, np.ndarray]: if (mu <= 0).any(): raise ValueError("mu must be positive to define service level.") if (d <= 0).any(): raise ValueError("neighbor demand proxy D must be positive.") # Upper bound for t: if all visits assigned to best site, service level there is (n_total*mu_i/d_i). # For max-min, t cannot exceed min_i (n_total*mu_i/d_i) but k_min can also bind; use conservative. t_hi = float(np.min((n_total * mu) / d)) t_lo = 0.0 # Binary search for max feasible t for _ in range(60): t_mid = (t_lo + t_hi) / 2.0 s = feasibility_sum_for_t(t_mid, d=d, mu=mu, k_min=k_min) if s <= n_total: t_lo = t_mid else: t_hi = t_mid t_star = t_lo k_base = np.ceil((t_star * d) / mu).astype(int) k_base = np.maximum(k_base, k_min) if k_base.sum() > n_total: # Numerical edge case: back off to ensure feasibility t_star *= 0.999 k_base = np.ceil((t_star * d) / mu).astype(int) k_base = np.maximum(k_base, k_min) if k_base.sum() > n_total: raise RuntimeError("Failed to construct feasible k at t*.") return t_star, k_base def distribute_remaining_fair(k: np.ndarray, mu: np.ndarray, d: np.ndarray, remaining: int) -> np.ndarray: # Greedy: repeatedly allocate to smallest current s_i = k_i*mu_i/d_i k_out = k.copy() for _ in range(remaining): s = (k_out * mu) / d idx = int(np.argmin(s)) k_out[idx] += 1 return k_out def distribute_remaining_efficient(k: np.ndarray, mu: np.ndarray, remaining: int) -> np.ndarray: # Greedy: allocate to highest mu_i (maximizes sum k_i*mu_i) k_out = k.copy() order = np.argsort(-mu, kind="stable") for t in range(remaining): k_out[order[t % len(order)]] += 1 return k_out @dataclass(frozen=True) class AllocationResult: method: str scenario: str k: np.ndarray t_star: float | None def compute_metrics(k: np.ndarray, mu: np.ndarray, d: np.ndarray) -> dict[str, float]: s = (k * mu) / d return { "k_sum": float(k.sum()), "total_expected_clients": float(np.dot(k, mu)), "service_level_min": float(s.min()), "service_level_mean": float(s.mean()), "service_level_gini": float(gini(s)), "service_level_cv": float(s.std(ddof=0) / (s.mean() + 1e-12)), "k_gini": float(gini(k.astype(float))), } def improve_efficiency_under_gini_cap( *, k_start: np.ndarray, mu: np.ndarray, d: np.ndarray, n_total: int, k_min: int, gini_cap: float, max_iters: int = 20000, ) -> np.ndarray: if k_start.sum() != n_total: raise ValueError("k_start must sum to n_total.") if (k_start < k_min).any(): raise ValueError("k_start violates k_min.") k = k_start.copy() s = (k * mu) / d g = gini(s) if g <= gini_cap: return k for _ in range(max_iters): s = (k * mu) / d g = gini(s) if g <= gini_cap: break donor_candidates = np.argsort(-s, kind="stable")[:10] receiver_candidates = np.argsort(s, kind="stable")[:10] best_move = None best_score = None for donor in donor_candidates: if k[donor] <= k_min: continue for receiver in receiver_candidates: if donor == receiver: continue k2 = k.copy() k2[donor] -= 1 k2[receiver] += 1 s2 = (k2 * mu) / d g2 = gini(s2) if g2 >= g: continue eff_loss = mu[donor] - mu[receiver] if eff_loss < 0: # This move increases effectiveness and improves fairness; prefer strongly. score = (g - g2) * 1e9 + (-eff_loss) else: score = (g - g2) / (eff_loss + 1e-9) if best_score is None or score > best_score: best_score = score best_move = (donor, receiver, k2) if best_move is None: # No improving local swap found; stop. break k = best_move[2] if k.sum() != n_total or (k < k_min).any(): raise RuntimeError("Post-optimization allocation violated constraints.") return k def main() -> None: sites = pd.read_excel(INPUT_XLSX, sheet_name="sites") mu = sites["mu_clients_per_visit"].to_numpy(float) visits_2019 = sites["visits_2019"].to_numpy(int) results: list[AllocationResult] = [] metrics_rows: list[dict[str, object]] = [] for scenario, dcol in RHO_COLS: d = sites[dcol].to_numpy(float) # Baseline: scaled 2019 to sum to N_TOTAL_2021 k_2019_scaled = largest_remainder_allocation( weights=visits_2019.astype(float), total=N_TOTAL_2021, k_min=K_MIN ) results.append(AllocationResult(method="baseline_2019_scaled", scenario=scenario, k=k_2019_scaled, t_star=None)) # Max-min baseline k achieving best possible min service level under sum constraint. t_star, k_base = max_min_service_level(d=d, mu=mu, n_total=N_TOTAL_2021, k_min=K_MIN) remaining = N_TOTAL_2021 - int(k_base.sum()) k_fair = distribute_remaining_fair(k_base, mu=mu, d=d, remaining=remaining) results.append(AllocationResult(method="fairness_waterfill", scenario=scenario, k=k_fair, t_star=t_star)) k_eff = distribute_remaining_efficient(k_base, mu=mu, remaining=remaining) results.append(AllocationResult(method="efficiency_post_fair", scenario=scenario, k=k_eff, t_star=t_star)) # Proportional to surrounding demand proxy D (uses "surrounding communities demand" directly) k_D = largest_remainder_allocation(weights=d, total=N_TOTAL_2021, k_min=K_MIN) results.append(AllocationResult(method="proportional_D", scenario=scenario, k=k_D, t_star=None)) # Simple proportional fairness: k proportional to D/mu gives near-constant s in continuous relaxation. k_prop = largest_remainder_allocation(weights=d / mu, total=N_TOTAL_2021, k_min=K_MIN) results.append(AllocationResult(method="proportional_D_over_mu", scenario=scenario, k=k_prop, t_star=None)) # Pure efficiency: k proportional to mu (with k_min) k_mu = largest_remainder_allocation(weights=mu, total=N_TOTAL_2021, k_min=K_MIN) results.append(AllocationResult(method="proportional_mu", scenario=scenario, k=k_mu, t_star=None)) # Efficiency-maximizing subject to "no worse fairness than baseline_2019_scaled" (auditable cap) gini_cap = compute_metrics(k_2019_scaled, mu=mu, d=d)["service_level_gini"] k_mu_capped = improve_efficiency_under_gini_cap( k_start=k_mu, mu=mu, d=d, n_total=N_TOTAL_2021, k_min=K_MIN, gini_cap=float(gini_cap) ) results.append(AllocationResult(method="proportional_mu_gini_capped_to_2019", scenario=scenario, k=k_mu_capped, t_star=None)) # Assemble per-site output out_sites = sites[["site_id", "site_name", "lat", "lon", "visits_2019", "mu_clients_per_visit", "sd_clients_per_visit"]].copy() for res in results: col_k = f"k_{res.method}_{res.scenario}" out_sites[col_k] = res.k d = sites[[c for s, c in RHO_COLS if s == res.scenario][0]].to_numpy(float) s_vals = (res.k * mu) / d out_sites[f"s_{res.method}_{res.scenario}"] = s_vals m = compute_metrics(res.k, mu=mu, d=d) row = {"scenario": res.scenario, "method": res.method, "t_star": res.t_star} row.update(m) metrics_rows.append(row) metrics_df = pd.DataFrame(metrics_rows).sort_values(["scenario", "method"]).reset_index(drop=True) with pd.ExcelWriter(OUTPUT_XLSX, engine="openpyxl") as writer: out_sites.to_excel(writer, index=False, sheet_name="allocations") metrics_df.to_excel(writer, index=False, sheet_name="metrics") if __name__ == "__main__": main()