P1

2026-01-19 01:40:19 +08:00
commit 44b5b31825
9 changed files with 745 additions and 0 deletions
--- a/task1/03_allocate_k.py
+++ b/task1/03_allocate_k.py
@@ -0,0 +1,301 @@
+from __future__ import annotations
+
+import math
+from dataclasses import dataclass
+
+import numpy as np
+import pandas as pd
+
+
+INPUT_XLSX = "task1/02_neighbor.xlsx"
+OUTPUT_XLSX = "task1/03_allocate.xlsx"
+
+N_TOTAL_2021 = 730  # scenario B: 2 sites/day * 365 days
+K_MIN = 1
+
+RHO_COLS = [
+    ("self", "neighbor_demand_mu_self"),
+    ("rho10", "neighbor_demand_mu_rho_10mi"),
+    ("rho20", "neighbor_demand_mu_rho_20mi"),
+    ("rho30", "neighbor_demand_mu_rho_30mi"),
+]
+
+
+def gini(x: np.ndarray) -> float:
+    x = np.asarray(x, dtype=float)
+    if np.any(x < 0):
+        raise ValueError("Gini expects nonnegative values.")
+    if np.allclose(x.sum(), 0.0):
+        return 0.0
+    n = len(x)
+    diff_sum = np.abs(x.reshape(-1, 1) - x.reshape(1, -1)).sum()
+    return diff_sum / (2.0 * n * n * x.mean())
+
+
+def largest_remainder_allocation(weights: np.ndarray, total: int, k_min: int) -> np.ndarray:
+    if total < k_min * len(weights):
+        raise ValueError("total too small for k_min constraint.")
+    w = np.asarray(weights, dtype=float)
+    if np.any(w < 0):
+        raise ValueError("weights must be nonnegative.")
+    if np.allclose(w.sum(), 0.0):
+        # Purely equal split
+        base = np.full(len(w), total // len(w), dtype=int)
+        base[: total - base.sum()] += 1
+        return np.maximum(base, k_min)
+
+    remaining = total - k_min * len(w)
+    w_norm = w / w.sum()
+    raw = remaining * w_norm
+    flo = np.floor(raw).astype(int)
+    k = flo + k_min
+
+    need = total - k.sum()
+    if need > 0:
+        frac = raw - flo
+        order = np.argsort(-frac, kind="stable")
+        k[order[:need]] += 1
+    elif need < 0:
+        frac = raw - flo
+        order = np.argsort(frac, kind="stable")
+        to_remove = -need
+        for idx in order:
+            if to_remove == 0:
+                break
+            if k[idx] > k_min:
+                k[idx] -= 1
+                to_remove -= 1
+        if k.sum() != total:
+            raise RuntimeError("Failed to adjust allocation to exact total.")
+
+    if k.sum() != total:
+        raise RuntimeError("Allocation did not match total.")
+    if (k < k_min).any():
+        raise RuntimeError("Allocation violated k_min.")
+    return k
+
+
+def feasibility_sum_for_t(t: float, d: np.ndarray, mu: np.ndarray, k_min: int) -> int:
+    # Constraints: k_i >= max(k_min, ceil(t * D_i / mu_i))
+    if t < 0:
+        return math.inf
+    req = np.ceil((t * d) / mu).astype(int)
+    req = np.maximum(req, k_min)
+    return int(req.sum())
+
+
+def max_min_service_level(d: np.ndarray, mu: np.ndarray, n_total: int, k_min: int) -> tuple[float, np.ndarray]:
+    if (mu <= 0).any():
+        raise ValueError("mu must be positive to define service level.")
+    if (d <= 0).any():
+        raise ValueError("neighbor demand proxy D must be positive.")
+
+    # Upper bound for t: if all visits assigned to best site, service level there is (n_total*mu_i/d_i).
+    # For max-min, t cannot exceed min_i (n_total*mu_i/d_i) but k_min can also bind; use conservative.
+    t_hi = float(np.min((n_total * mu) / d))
+    t_lo = 0.0
+
+    # Binary search for max feasible t
+    for _ in range(60):
+        t_mid = (t_lo + t_hi) / 2.0
+        s = feasibility_sum_for_t(t_mid, d=d, mu=mu, k_min=k_min)
+        if s <= n_total:
+            t_lo = t_mid
+        else:
+            t_hi = t_mid
+
+    t_star = t_lo
+    k_base = np.ceil((t_star * d) / mu).astype(int)
+    k_base = np.maximum(k_base, k_min)
+
+    if k_base.sum() > n_total:
+        # Numerical edge case: back off to ensure feasibility
+        t_star *= 0.999
+        k_base = np.ceil((t_star * d) / mu).astype(int)
+        k_base = np.maximum(k_base, k_min)
+        if k_base.sum() > n_total:
+            raise RuntimeError("Failed to construct feasible k at t*.")
+
+    return t_star, k_base
+
+
+def distribute_remaining_fair(k: np.ndarray, mu: np.ndarray, d: np.ndarray, remaining: int) -> np.ndarray:
+    # Greedy: repeatedly allocate to smallest current s_i = k_i*mu_i/d_i
+    k_out = k.copy()
+    for _ in range(remaining):
+        s = (k_out * mu) / d
+        idx = int(np.argmin(s))
+        k_out[idx] += 1
+    return k_out
+
+
+def distribute_remaining_efficient(k: np.ndarray, mu: np.ndarray, remaining: int) -> np.ndarray:
+    # Greedy: allocate to highest mu_i (maximizes sum k_i*mu_i)
+    k_out = k.copy()
+    order = np.argsort(-mu, kind="stable")
+    for t in range(remaining):
+        k_out[order[t % len(order)]] += 1
+    return k_out
+
+
+@dataclass(frozen=True)
+class AllocationResult:
+    method: str
+    scenario: str
+    k: np.ndarray
+    t_star: float | None
+
+
+def compute_metrics(k: np.ndarray, mu: np.ndarray, d: np.ndarray) -> dict[str, float]:
+    s = (k * mu) / d
+    return {
+        "k_sum": float(k.sum()),
+        "total_expected_clients": float(np.dot(k, mu)),
+        "service_level_min": float(s.min()),
+        "service_level_mean": float(s.mean()),
+        "service_level_gini": float(gini(s)),
+        "service_level_cv": float(s.std(ddof=0) / (s.mean() + 1e-12)),
+        "k_gini": float(gini(k.astype(float))),
+    }
+
+
+def improve_efficiency_under_gini_cap(
+    *,
+    k_start: np.ndarray,
+    mu: np.ndarray,
+    d: np.ndarray,
+    n_total: int,
+    k_min: int,
+    gini_cap: float,
+    max_iters: int = 20000,
+) -> np.ndarray:
+    if k_start.sum() != n_total:
+        raise ValueError("k_start must sum to n_total.")
+    if (k_start < k_min).any():
+        raise ValueError("k_start violates k_min.")
+
+    k = k_start.copy()
+    s = (k * mu) / d
+    g = gini(s)
+    if g <= gini_cap:
+        return k
+
+    for _ in range(max_iters):
+        s = (k * mu) / d
+        g = gini(s)
+        if g <= gini_cap:
+            break
+
+        donor_candidates = np.argsort(-s, kind="stable")[:10]
+        receiver_candidates = np.argsort(s, kind="stable")[:10]
+
+        best_move = None
+        best_score = None
+
+        for donor in donor_candidates:
+            if k[donor] <= k_min:
+                continue
+            for receiver in receiver_candidates:
+                if donor == receiver:
+                    continue
+
+                k2 = k.copy()
+                k2[donor] -= 1
+                k2[receiver] += 1
+                s2 = (k2 * mu) / d
+                g2 = gini(s2)
+                if g2 >= g:
+                    continue
+
+                eff_loss = mu[donor] - mu[receiver]
+                if eff_loss < 0:
+                    # This move increases effectiveness and improves fairness; prefer strongly.
+                    score = (g - g2) * 1e9 + (-eff_loss)
+                else:
+                    score = (g - g2) / (eff_loss + 1e-9)
+
+                if best_score is None or score > best_score:
+                    best_score = score
+                    best_move = (donor, receiver, k2)
+
+        if best_move is None:
+            # No improving local swap found; stop.
+            break
+        k = best_move[2]
+
+    if k.sum() != n_total or (k < k_min).any():
+        raise RuntimeError("Post-optimization allocation violated constraints.")
+    return k
+
+
+def main() -> None:
+    sites = pd.read_excel(INPUT_XLSX, sheet_name="sites")
+    mu = sites["mu_clients_per_visit"].to_numpy(float)
+    visits_2019 = sites["visits_2019"].to_numpy(int)
+
+    results: list[AllocationResult] = []
+    metrics_rows: list[dict[str, object]] = []
+
+    for scenario, dcol in RHO_COLS:
+        d = sites[dcol].to_numpy(float)
+
+        # Baseline: scaled 2019 to sum to N_TOTAL_2021
+        k_2019_scaled = largest_remainder_allocation(
+            weights=visits_2019.astype(float), total=N_TOTAL_2021, k_min=K_MIN
+        )
+        results.append(AllocationResult(method="baseline_2019_scaled", scenario=scenario, k=k_2019_scaled, t_star=None))
+
+        # Max-min baseline k achieving best possible min service level under sum constraint.
+        t_star, k_base = max_min_service_level(d=d, mu=mu, n_total=N_TOTAL_2021, k_min=K_MIN)
+        remaining = N_TOTAL_2021 - int(k_base.sum())
+
+        k_fair = distribute_remaining_fair(k_base, mu=mu, d=d, remaining=remaining)
+        results.append(AllocationResult(method="fairness_waterfill", scenario=scenario, k=k_fair, t_star=t_star))
+
+        k_eff = distribute_remaining_efficient(k_base, mu=mu, remaining=remaining)
+        results.append(AllocationResult(method="efficiency_post_fair", scenario=scenario, k=k_eff, t_star=t_star))
+
+        # Proportional to surrounding demand proxy D (uses "surrounding communities demand" directly)
+        k_D = largest_remainder_allocation(weights=d, total=N_TOTAL_2021, k_min=K_MIN)
+        results.append(AllocationResult(method="proportional_D", scenario=scenario, k=k_D, t_star=None))
+
+        # Simple proportional fairness: k proportional to D/mu gives near-constant s in continuous relaxation.
+        k_prop = largest_remainder_allocation(weights=d / mu, total=N_TOTAL_2021, k_min=K_MIN)
+        results.append(AllocationResult(method="proportional_D_over_mu", scenario=scenario, k=k_prop, t_star=None))
+
+        # Pure efficiency: k proportional to mu (with k_min)
+        k_mu = largest_remainder_allocation(weights=mu, total=N_TOTAL_2021, k_min=K_MIN)
+        results.append(AllocationResult(method="proportional_mu", scenario=scenario, k=k_mu, t_star=None))
+
+        # Efficiency-maximizing subject to "no worse fairness than baseline_2019_scaled" (auditable cap)
+        gini_cap = compute_metrics(k_2019_scaled, mu=mu, d=d)["service_level_gini"]
+        k_mu_capped = improve_efficiency_under_gini_cap(
+            k_start=k_mu, mu=mu, d=d, n_total=N_TOTAL_2021, k_min=K_MIN, gini_cap=float(gini_cap)
+        )
+        results.append(AllocationResult(method="proportional_mu_gini_capped_to_2019", scenario=scenario, k=k_mu_capped, t_star=None))
+
+    # Assemble per-site output
+    out_sites = sites[["site_id", "site_name", "lat", "lon", "visits_2019", "mu_clients_per_visit", "sd_clients_per_visit"]].copy()
+
+    for res in results:
+        col_k = f"k_{res.method}_{res.scenario}"
+        out_sites[col_k] = res.k
+
+        d = sites[[c for s, c in RHO_COLS if s == res.scenario][0]].to_numpy(float)
+        s_vals = (res.k * mu) / d
+        out_sites[f"s_{res.method}_{res.scenario}"] = s_vals
+
+        m = compute_metrics(res.k, mu=mu, d=d)
+        row = {"scenario": res.scenario, "method": res.method, "t_star": res.t_star}
+        row.update(m)
+        metrics_rows.append(row)
+
+    metrics_df = pd.DataFrame(metrics_rows).sort_values(["scenario", "method"]).reset_index(drop=True)
+
+    with pd.ExcelWriter(OUTPUT_XLSX, engine="openpyxl") as writer:
+        out_sites.to_excel(writer, index=False, sheet_name="allocations")
+        metrics_df.to_excel(writer, index=False, sheet_name="metrics")
+
+
+if __name__ == "__main__":
+    main()