302 lines
10 KiB
Python
302 lines
10 KiB
Python
from __future__ import annotations
|
|
|
|
import math
|
|
from dataclasses import dataclass
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
|
|
|
|
INPUT_XLSX = "task1/02_neighbor.xlsx"
|
|
OUTPUT_XLSX = "task1/03_allocate.xlsx"
|
|
|
|
N_TOTAL_2021 = 730 # scenario B: 2 sites/day * 365 days
|
|
K_MIN = 1
|
|
|
|
RHO_COLS = [
|
|
("self", "neighbor_demand_mu_self"),
|
|
("rho10", "neighbor_demand_mu_rho_10mi"),
|
|
("rho20", "neighbor_demand_mu_rho_20mi"),
|
|
("rho30", "neighbor_demand_mu_rho_30mi"),
|
|
]
|
|
|
|
|
|
def gini(x: np.ndarray) -> float:
|
|
x = np.asarray(x, dtype=float)
|
|
if np.any(x < 0):
|
|
raise ValueError("Gini expects nonnegative values.")
|
|
if np.allclose(x.sum(), 0.0):
|
|
return 0.0
|
|
n = len(x)
|
|
diff_sum = np.abs(x.reshape(-1, 1) - x.reshape(1, -1)).sum()
|
|
return diff_sum / (2.0 * n * n * x.mean())
|
|
|
|
|
|
def largest_remainder_allocation(weights: np.ndarray, total: int, k_min: int) -> np.ndarray:
|
|
if total < k_min * len(weights):
|
|
raise ValueError("total too small for k_min constraint.")
|
|
w = np.asarray(weights, dtype=float)
|
|
if np.any(w < 0):
|
|
raise ValueError("weights must be nonnegative.")
|
|
if np.allclose(w.sum(), 0.0):
|
|
# Purely equal split
|
|
base = np.full(len(w), total // len(w), dtype=int)
|
|
base[: total - base.sum()] += 1
|
|
return np.maximum(base, k_min)
|
|
|
|
remaining = total - k_min * len(w)
|
|
w_norm = w / w.sum()
|
|
raw = remaining * w_norm
|
|
flo = np.floor(raw).astype(int)
|
|
k = flo + k_min
|
|
|
|
need = total - k.sum()
|
|
if need > 0:
|
|
frac = raw - flo
|
|
order = np.argsort(-frac, kind="stable")
|
|
k[order[:need]] += 1
|
|
elif need < 0:
|
|
frac = raw - flo
|
|
order = np.argsort(frac, kind="stable")
|
|
to_remove = -need
|
|
for idx in order:
|
|
if to_remove == 0:
|
|
break
|
|
if k[idx] > k_min:
|
|
k[idx] -= 1
|
|
to_remove -= 1
|
|
if k.sum() != total:
|
|
raise RuntimeError("Failed to adjust allocation to exact total.")
|
|
|
|
if k.sum() != total:
|
|
raise RuntimeError("Allocation did not match total.")
|
|
if (k < k_min).any():
|
|
raise RuntimeError("Allocation violated k_min.")
|
|
return k
|
|
|
|
|
|
def feasibility_sum_for_t(t: float, d: np.ndarray, mu: np.ndarray, k_min: int) -> int:
|
|
# Constraints: k_i >= max(k_min, ceil(t * D_i / mu_i))
|
|
if t < 0:
|
|
return math.inf
|
|
req = np.ceil((t * d) / mu).astype(int)
|
|
req = np.maximum(req, k_min)
|
|
return int(req.sum())
|
|
|
|
|
|
def max_min_service_level(d: np.ndarray, mu: np.ndarray, n_total: int, k_min: int) -> tuple[float, np.ndarray]:
|
|
if (mu <= 0).any():
|
|
raise ValueError("mu must be positive to define service level.")
|
|
if (d <= 0).any():
|
|
raise ValueError("neighbor demand proxy D must be positive.")
|
|
|
|
# Upper bound for t: if all visits assigned to best site, service level there is (n_total*mu_i/d_i).
|
|
# For max-min, t cannot exceed min_i (n_total*mu_i/d_i) but k_min can also bind; use conservative.
|
|
t_hi = float(np.min((n_total * mu) / d))
|
|
t_lo = 0.0
|
|
|
|
# Binary search for max feasible t
|
|
for _ in range(60):
|
|
t_mid = (t_lo + t_hi) / 2.0
|
|
s = feasibility_sum_for_t(t_mid, d=d, mu=mu, k_min=k_min)
|
|
if s <= n_total:
|
|
t_lo = t_mid
|
|
else:
|
|
t_hi = t_mid
|
|
|
|
t_star = t_lo
|
|
k_base = np.ceil((t_star * d) / mu).astype(int)
|
|
k_base = np.maximum(k_base, k_min)
|
|
|
|
if k_base.sum() > n_total:
|
|
# Numerical edge case: back off to ensure feasibility
|
|
t_star *= 0.999
|
|
k_base = np.ceil((t_star * d) / mu).astype(int)
|
|
k_base = np.maximum(k_base, k_min)
|
|
if k_base.sum() > n_total:
|
|
raise RuntimeError("Failed to construct feasible k at t*.")
|
|
|
|
return t_star, k_base
|
|
|
|
|
|
def distribute_remaining_fair(k: np.ndarray, mu: np.ndarray, d: np.ndarray, remaining: int) -> np.ndarray:
|
|
# Greedy: repeatedly allocate to smallest current s_i = k_i*mu_i/d_i
|
|
k_out = k.copy()
|
|
for _ in range(remaining):
|
|
s = (k_out * mu) / d
|
|
idx = int(np.argmin(s))
|
|
k_out[idx] += 1
|
|
return k_out
|
|
|
|
|
|
def distribute_remaining_efficient(k: np.ndarray, mu: np.ndarray, remaining: int) -> np.ndarray:
|
|
# Greedy: allocate to highest mu_i (maximizes sum k_i*mu_i)
|
|
k_out = k.copy()
|
|
order = np.argsort(-mu, kind="stable")
|
|
for t in range(remaining):
|
|
k_out[order[t % len(order)]] += 1
|
|
return k_out
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class AllocationResult:
|
|
method: str
|
|
scenario: str
|
|
k: np.ndarray
|
|
t_star: float | None
|
|
|
|
|
|
def compute_metrics(k: np.ndarray, mu: np.ndarray, d: np.ndarray) -> dict[str, float]:
|
|
s = (k * mu) / d
|
|
return {
|
|
"k_sum": float(k.sum()),
|
|
"total_expected_clients": float(np.dot(k, mu)),
|
|
"service_level_min": float(s.min()),
|
|
"service_level_mean": float(s.mean()),
|
|
"service_level_gini": float(gini(s)),
|
|
"service_level_cv": float(s.std(ddof=0) / (s.mean() + 1e-12)),
|
|
"k_gini": float(gini(k.astype(float))),
|
|
}
|
|
|
|
|
|
def improve_efficiency_under_gini_cap(
|
|
*,
|
|
k_start: np.ndarray,
|
|
mu: np.ndarray,
|
|
d: np.ndarray,
|
|
n_total: int,
|
|
k_min: int,
|
|
gini_cap: float,
|
|
max_iters: int = 20000,
|
|
) -> np.ndarray:
|
|
if k_start.sum() != n_total:
|
|
raise ValueError("k_start must sum to n_total.")
|
|
if (k_start < k_min).any():
|
|
raise ValueError("k_start violates k_min.")
|
|
|
|
k = k_start.copy()
|
|
s = (k * mu) / d
|
|
g = gini(s)
|
|
if g <= gini_cap:
|
|
return k
|
|
|
|
for _ in range(max_iters):
|
|
s = (k * mu) / d
|
|
g = gini(s)
|
|
if g <= gini_cap:
|
|
break
|
|
|
|
donor_candidates = np.argsort(-s, kind="stable")[:10]
|
|
receiver_candidates = np.argsort(s, kind="stable")[:10]
|
|
|
|
best_move = None
|
|
best_score = None
|
|
|
|
for donor in donor_candidates:
|
|
if k[donor] <= k_min:
|
|
continue
|
|
for receiver in receiver_candidates:
|
|
if donor == receiver:
|
|
continue
|
|
|
|
k2 = k.copy()
|
|
k2[donor] -= 1
|
|
k2[receiver] += 1
|
|
s2 = (k2 * mu) / d
|
|
g2 = gini(s2)
|
|
if g2 >= g:
|
|
continue
|
|
|
|
eff_loss = mu[donor] - mu[receiver]
|
|
if eff_loss < 0:
|
|
# This move increases effectiveness and improves fairness; prefer strongly.
|
|
score = (g - g2) * 1e9 + (-eff_loss)
|
|
else:
|
|
score = (g - g2) / (eff_loss + 1e-9)
|
|
|
|
if best_score is None or score > best_score:
|
|
best_score = score
|
|
best_move = (donor, receiver, k2)
|
|
|
|
if best_move is None:
|
|
# No improving local swap found; stop.
|
|
break
|
|
k = best_move[2]
|
|
|
|
if k.sum() != n_total or (k < k_min).any():
|
|
raise RuntimeError("Post-optimization allocation violated constraints.")
|
|
return k
|
|
|
|
|
|
def main() -> None:
|
|
sites = pd.read_excel(INPUT_XLSX, sheet_name="sites")
|
|
mu = sites["mu_clients_per_visit"].to_numpy(float)
|
|
visits_2019 = sites["visits_2019"].to_numpy(int)
|
|
|
|
results: list[AllocationResult] = []
|
|
metrics_rows: list[dict[str, object]] = []
|
|
|
|
for scenario, dcol in RHO_COLS:
|
|
d = sites[dcol].to_numpy(float)
|
|
|
|
# Baseline: scaled 2019 to sum to N_TOTAL_2021
|
|
k_2019_scaled = largest_remainder_allocation(
|
|
weights=visits_2019.astype(float), total=N_TOTAL_2021, k_min=K_MIN
|
|
)
|
|
results.append(AllocationResult(method="baseline_2019_scaled", scenario=scenario, k=k_2019_scaled, t_star=None))
|
|
|
|
# Max-min baseline k achieving best possible min service level under sum constraint.
|
|
t_star, k_base = max_min_service_level(d=d, mu=mu, n_total=N_TOTAL_2021, k_min=K_MIN)
|
|
remaining = N_TOTAL_2021 - int(k_base.sum())
|
|
|
|
k_fair = distribute_remaining_fair(k_base, mu=mu, d=d, remaining=remaining)
|
|
results.append(AllocationResult(method="fairness_waterfill", scenario=scenario, k=k_fair, t_star=t_star))
|
|
|
|
k_eff = distribute_remaining_efficient(k_base, mu=mu, remaining=remaining)
|
|
results.append(AllocationResult(method="efficiency_post_fair", scenario=scenario, k=k_eff, t_star=t_star))
|
|
|
|
# Proportional to surrounding demand proxy D (uses "surrounding communities demand" directly)
|
|
k_D = largest_remainder_allocation(weights=d, total=N_TOTAL_2021, k_min=K_MIN)
|
|
results.append(AllocationResult(method="proportional_D", scenario=scenario, k=k_D, t_star=None))
|
|
|
|
# Simple proportional fairness: k proportional to D/mu gives near-constant s in continuous relaxation.
|
|
k_prop = largest_remainder_allocation(weights=d / mu, total=N_TOTAL_2021, k_min=K_MIN)
|
|
results.append(AllocationResult(method="proportional_D_over_mu", scenario=scenario, k=k_prop, t_star=None))
|
|
|
|
# Pure efficiency: k proportional to mu (with k_min)
|
|
k_mu = largest_remainder_allocation(weights=mu, total=N_TOTAL_2021, k_min=K_MIN)
|
|
results.append(AllocationResult(method="proportional_mu", scenario=scenario, k=k_mu, t_star=None))
|
|
|
|
# Efficiency-maximizing subject to "no worse fairness than baseline_2019_scaled" (auditable cap)
|
|
gini_cap = compute_metrics(k_2019_scaled, mu=mu, d=d)["service_level_gini"]
|
|
k_mu_capped = improve_efficiency_under_gini_cap(
|
|
k_start=k_mu, mu=mu, d=d, n_total=N_TOTAL_2021, k_min=K_MIN, gini_cap=float(gini_cap)
|
|
)
|
|
results.append(AllocationResult(method="proportional_mu_gini_capped_to_2019", scenario=scenario, k=k_mu_capped, t_star=None))
|
|
|
|
# Assemble per-site output
|
|
out_sites = sites[["site_id", "site_name", "lat", "lon", "visits_2019", "mu_clients_per_visit", "sd_clients_per_visit"]].copy()
|
|
|
|
for res in results:
|
|
col_k = f"k_{res.method}_{res.scenario}"
|
|
out_sites[col_k] = res.k
|
|
|
|
d = sites[[c for s, c in RHO_COLS if s == res.scenario][0]].to_numpy(float)
|
|
s_vals = (res.k * mu) / d
|
|
out_sites[f"s_{res.method}_{res.scenario}"] = s_vals
|
|
|
|
m = compute_metrics(res.k, mu=mu, d=d)
|
|
row = {"scenario": res.scenario, "method": res.method, "t_star": res.t_star}
|
|
row.update(m)
|
|
metrics_rows.append(row)
|
|
|
|
metrics_df = pd.DataFrame(metrics_rows).sort_values(["scenario", "method"]).reset_index(drop=True)
|
|
|
|
with pd.ExcelWriter(OUTPUT_XLSX, engine="openpyxl") as writer:
|
|
out_sites.to_excel(writer, index=False, sheet_name="allocations")
|
|
metrics_df.to_excel(writer, index=False, sheet_name="metrics")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|