This commit is contained in:
2026-01-19 01:40:19 +08:00
commit 44b5b31825
9 changed files with 745 additions and 0 deletions

301
task1/03_allocate_k.py Normal file
View File

@@ -0,0 +1,301 @@
from __future__ import annotations
import math
from dataclasses import dataclass
import numpy as np
import pandas as pd
INPUT_XLSX = "task1/02_neighbor.xlsx"
OUTPUT_XLSX = "task1/03_allocate.xlsx"
N_TOTAL_2021 = 730 # scenario B: 2 sites/day * 365 days
K_MIN = 1
RHO_COLS = [
("self", "neighbor_demand_mu_self"),
("rho10", "neighbor_demand_mu_rho_10mi"),
("rho20", "neighbor_demand_mu_rho_20mi"),
("rho30", "neighbor_demand_mu_rho_30mi"),
]
def gini(x: np.ndarray) -> float:
x = np.asarray(x, dtype=float)
if np.any(x < 0):
raise ValueError("Gini expects nonnegative values.")
if np.allclose(x.sum(), 0.0):
return 0.0
n = len(x)
diff_sum = np.abs(x.reshape(-1, 1) - x.reshape(1, -1)).sum()
return diff_sum / (2.0 * n * n * x.mean())
def largest_remainder_allocation(weights: np.ndarray, total: int, k_min: int) -> np.ndarray:
if total < k_min * len(weights):
raise ValueError("total too small for k_min constraint.")
w = np.asarray(weights, dtype=float)
if np.any(w < 0):
raise ValueError("weights must be nonnegative.")
if np.allclose(w.sum(), 0.0):
# Purely equal split
base = np.full(len(w), total // len(w), dtype=int)
base[: total - base.sum()] += 1
return np.maximum(base, k_min)
remaining = total - k_min * len(w)
w_norm = w / w.sum()
raw = remaining * w_norm
flo = np.floor(raw).astype(int)
k = flo + k_min
need = total - k.sum()
if need > 0:
frac = raw - flo
order = np.argsort(-frac, kind="stable")
k[order[:need]] += 1
elif need < 0:
frac = raw - flo
order = np.argsort(frac, kind="stable")
to_remove = -need
for idx in order:
if to_remove == 0:
break
if k[idx] > k_min:
k[idx] -= 1
to_remove -= 1
if k.sum() != total:
raise RuntimeError("Failed to adjust allocation to exact total.")
if k.sum() != total:
raise RuntimeError("Allocation did not match total.")
if (k < k_min).any():
raise RuntimeError("Allocation violated k_min.")
return k
def feasibility_sum_for_t(t: float, d: np.ndarray, mu: np.ndarray, k_min: int) -> int:
# Constraints: k_i >= max(k_min, ceil(t * D_i / mu_i))
if t < 0:
return math.inf
req = np.ceil((t * d) / mu).astype(int)
req = np.maximum(req, k_min)
return int(req.sum())
def max_min_service_level(d: np.ndarray, mu: np.ndarray, n_total: int, k_min: int) -> tuple[float, np.ndarray]:
if (mu <= 0).any():
raise ValueError("mu must be positive to define service level.")
if (d <= 0).any():
raise ValueError("neighbor demand proxy D must be positive.")
# Upper bound for t: if all visits assigned to best site, service level there is (n_total*mu_i/d_i).
# For max-min, t cannot exceed min_i (n_total*mu_i/d_i) but k_min can also bind; use conservative.
t_hi = float(np.min((n_total * mu) / d))
t_lo = 0.0
# Binary search for max feasible t
for _ in range(60):
t_mid = (t_lo + t_hi) / 2.0
s = feasibility_sum_for_t(t_mid, d=d, mu=mu, k_min=k_min)
if s <= n_total:
t_lo = t_mid
else:
t_hi = t_mid
t_star = t_lo
k_base = np.ceil((t_star * d) / mu).astype(int)
k_base = np.maximum(k_base, k_min)
if k_base.sum() > n_total:
# Numerical edge case: back off to ensure feasibility
t_star *= 0.999
k_base = np.ceil((t_star * d) / mu).astype(int)
k_base = np.maximum(k_base, k_min)
if k_base.sum() > n_total:
raise RuntimeError("Failed to construct feasible k at t*.")
return t_star, k_base
def distribute_remaining_fair(k: np.ndarray, mu: np.ndarray, d: np.ndarray, remaining: int) -> np.ndarray:
# Greedy: repeatedly allocate to smallest current s_i = k_i*mu_i/d_i
k_out = k.copy()
for _ in range(remaining):
s = (k_out * mu) / d
idx = int(np.argmin(s))
k_out[idx] += 1
return k_out
def distribute_remaining_efficient(k: np.ndarray, mu: np.ndarray, remaining: int) -> np.ndarray:
# Greedy: allocate to highest mu_i (maximizes sum k_i*mu_i)
k_out = k.copy()
order = np.argsort(-mu, kind="stable")
for t in range(remaining):
k_out[order[t % len(order)]] += 1
return k_out
@dataclass(frozen=True)
class AllocationResult:
method: str
scenario: str
k: np.ndarray
t_star: float | None
def compute_metrics(k: np.ndarray, mu: np.ndarray, d: np.ndarray) -> dict[str, float]:
s = (k * mu) / d
return {
"k_sum": float(k.sum()),
"total_expected_clients": float(np.dot(k, mu)),
"service_level_min": float(s.min()),
"service_level_mean": float(s.mean()),
"service_level_gini": float(gini(s)),
"service_level_cv": float(s.std(ddof=0) / (s.mean() + 1e-12)),
"k_gini": float(gini(k.astype(float))),
}
def improve_efficiency_under_gini_cap(
*,
k_start: np.ndarray,
mu: np.ndarray,
d: np.ndarray,
n_total: int,
k_min: int,
gini_cap: float,
max_iters: int = 20000,
) -> np.ndarray:
if k_start.sum() != n_total:
raise ValueError("k_start must sum to n_total.")
if (k_start < k_min).any():
raise ValueError("k_start violates k_min.")
k = k_start.copy()
s = (k * mu) / d
g = gini(s)
if g <= gini_cap:
return k
for _ in range(max_iters):
s = (k * mu) / d
g = gini(s)
if g <= gini_cap:
break
donor_candidates = np.argsort(-s, kind="stable")[:10]
receiver_candidates = np.argsort(s, kind="stable")[:10]
best_move = None
best_score = None
for donor in donor_candidates:
if k[donor] <= k_min:
continue
for receiver in receiver_candidates:
if donor == receiver:
continue
k2 = k.copy()
k2[donor] -= 1
k2[receiver] += 1
s2 = (k2 * mu) / d
g2 = gini(s2)
if g2 >= g:
continue
eff_loss = mu[donor] - mu[receiver]
if eff_loss < 0:
# This move increases effectiveness and improves fairness; prefer strongly.
score = (g - g2) * 1e9 + (-eff_loss)
else:
score = (g - g2) / (eff_loss + 1e-9)
if best_score is None or score > best_score:
best_score = score
best_move = (donor, receiver, k2)
if best_move is None:
# No improving local swap found; stop.
break
k = best_move[2]
if k.sum() != n_total or (k < k_min).any():
raise RuntimeError("Post-optimization allocation violated constraints.")
return k
def main() -> None:
sites = pd.read_excel(INPUT_XLSX, sheet_name="sites")
mu = sites["mu_clients_per_visit"].to_numpy(float)
visits_2019 = sites["visits_2019"].to_numpy(int)
results: list[AllocationResult] = []
metrics_rows: list[dict[str, object]] = []
for scenario, dcol in RHO_COLS:
d = sites[dcol].to_numpy(float)
# Baseline: scaled 2019 to sum to N_TOTAL_2021
k_2019_scaled = largest_remainder_allocation(
weights=visits_2019.astype(float), total=N_TOTAL_2021, k_min=K_MIN
)
results.append(AllocationResult(method="baseline_2019_scaled", scenario=scenario, k=k_2019_scaled, t_star=None))
# Max-min baseline k achieving best possible min service level under sum constraint.
t_star, k_base = max_min_service_level(d=d, mu=mu, n_total=N_TOTAL_2021, k_min=K_MIN)
remaining = N_TOTAL_2021 - int(k_base.sum())
k_fair = distribute_remaining_fair(k_base, mu=mu, d=d, remaining=remaining)
results.append(AllocationResult(method="fairness_waterfill", scenario=scenario, k=k_fair, t_star=t_star))
k_eff = distribute_remaining_efficient(k_base, mu=mu, remaining=remaining)
results.append(AllocationResult(method="efficiency_post_fair", scenario=scenario, k=k_eff, t_star=t_star))
# Proportional to surrounding demand proxy D (uses "surrounding communities demand" directly)
k_D = largest_remainder_allocation(weights=d, total=N_TOTAL_2021, k_min=K_MIN)
results.append(AllocationResult(method="proportional_D", scenario=scenario, k=k_D, t_star=None))
# Simple proportional fairness: k proportional to D/mu gives near-constant s in continuous relaxation.
k_prop = largest_remainder_allocation(weights=d / mu, total=N_TOTAL_2021, k_min=K_MIN)
results.append(AllocationResult(method="proportional_D_over_mu", scenario=scenario, k=k_prop, t_star=None))
# Pure efficiency: k proportional to mu (with k_min)
k_mu = largest_remainder_allocation(weights=mu, total=N_TOTAL_2021, k_min=K_MIN)
results.append(AllocationResult(method="proportional_mu", scenario=scenario, k=k_mu, t_star=None))
# Efficiency-maximizing subject to "no worse fairness than baseline_2019_scaled" (auditable cap)
gini_cap = compute_metrics(k_2019_scaled, mu=mu, d=d)["service_level_gini"]
k_mu_capped = improve_efficiency_under_gini_cap(
k_start=k_mu, mu=mu, d=d, n_total=N_TOTAL_2021, k_min=K_MIN, gini_cap=float(gini_cap)
)
results.append(AllocationResult(method="proportional_mu_gini_capped_to_2019", scenario=scenario, k=k_mu_capped, t_star=None))
# Assemble per-site output
out_sites = sites[["site_id", "site_name", "lat", "lon", "visits_2019", "mu_clients_per_visit", "sd_clients_per_visit"]].copy()
for res in results:
col_k = f"k_{res.method}_{res.scenario}"
out_sites[col_k] = res.k
d = sites[[c for s, c in RHO_COLS if s == res.scenario][0]].to_numpy(float)
s_vals = (res.k * mu) / d
out_sites[f"s_{res.method}_{res.scenario}"] = s_vals
m = compute_metrics(res.k, mu=mu, d=d)
row = {"scenario": res.scenario, "method": res.method, "t_star": res.t_star}
row.update(m)
metrics_rows.append(row)
metrics_df = pd.DataFrame(metrics_rows).sort_values(["scenario", "method"]).reset_index(drop=True)
with pd.ExcelWriter(OUTPUT_XLSX, engine="openpyxl") as writer:
out_sites.to_excel(writer, index=False, sheet_name="allocations")
metrics_df.to_excel(writer, index=False, sheet_name="metrics")
if __name__ == "__main__":
main()