mcm-mfp/task1/03_allocate_k.py

from __future__ import annotations

import math
from dataclasses import dataclass

import numpy as np
import pandas as pd


INPUT_XLSX = "task1/02_neighbor.xlsx"
OUTPUT_XLSX = "task1/03_allocate.xlsx"

N_TOTAL_2021 = 730  # scenario B: 2 sites/day * 365 days
K_MIN = 1

RHO_COLS = [
    ("self", "neighbor_demand_mu_self"),
    ("rho10", "neighbor_demand_mu_rho_10mi"),
    ("rho20", "neighbor_demand_mu_rho_20mi"),
    ("rho30", "neighbor_demand_mu_rho_30mi"),
]


def gini(x: np.ndarray) -> float:
    x = np.asarray(x, dtype=float)
    if np.any(x < 0):
        raise ValueError("Gini expects nonnegative values.")
    if np.allclose(x.sum(), 0.0):
        return 0.0
    n = len(x)
    diff_sum = np.abs(x.reshape(-1, 1) - x.reshape(1, -1)).sum()
    return diff_sum / (2.0 * n * n * x.mean())


def largest_remainder_allocation(weights: np.ndarray, total: int, k_min: int) -> np.ndarray:
    if total < k_min * len(weights):
        raise ValueError("total too small for k_min constraint.")
    w = np.asarray(weights, dtype=float)
    if np.any(w < 0):
        raise ValueError("weights must be nonnegative.")
    if np.allclose(w.sum(), 0.0):
        # Purely equal split
        base = np.full(len(w), total // len(w), dtype=int)
        base[: total - base.sum()] += 1
        return np.maximum(base, k_min)

    remaining = total - k_min * len(w)
    w_norm = w / w.sum()
    raw = remaining * w_norm
    flo = np.floor(raw).astype(int)
    k = flo + k_min

    need = total - k.sum()
    if need > 0:
        frac = raw - flo
        order = np.argsort(-frac, kind="stable")
        k[order[:need]] += 1
    elif need < 0:
        frac = raw - flo
        order = np.argsort(frac, kind="stable")
        to_remove = -need
        for idx in order:
            if to_remove == 0:
                break
            if k[idx] > k_min:
                k[idx] -= 1
                to_remove -= 1
        if k.sum() != total:
            raise RuntimeError("Failed to adjust allocation to exact total.")

    if k.sum() != total:
        raise RuntimeError("Allocation did not match total.")
    if (k < k_min).any():
        raise RuntimeError("Allocation violated k_min.")
    return k


def feasibility_sum_for_t(t: float, d: np.ndarray, mu: np.ndarray, k_min: int) -> int:
    # Constraints: k_i >= max(k_min, ceil(t * D_i / mu_i))
    if t < 0:
        return math.inf
    req = np.ceil((t * d) / mu).astype(int)
    req = np.maximum(req, k_min)
    return int(req.sum())


def max_min_service_level(d: np.ndarray, mu: np.ndarray, n_total: int, k_min: int) -> tuple[float, np.ndarray]:
    if (mu <= 0).any():
        raise ValueError("mu must be positive to define service level.")
    if (d <= 0).any():
        raise ValueError("neighbor demand proxy D must be positive.")

    # Upper bound for t: if all visits assigned to best site, service level there is (n_total*mu_i/d_i).
    # For max-min, t cannot exceed min_i (n_total*mu_i/d_i) but k_min can also bind; use conservative.
    t_hi = float(np.min((n_total * mu) / d))
    t_lo = 0.0

    # Binary search for max feasible t
    for _ in range(60):
        t_mid = (t_lo + t_hi) / 2.0
        s = feasibility_sum_for_t(t_mid, d=d, mu=mu, k_min=k_min)
        if s <= n_total:
            t_lo = t_mid
        else:
            t_hi = t_mid

    t_star = t_lo
    k_base = np.ceil((t_star * d) / mu).astype(int)
    k_base = np.maximum(k_base, k_min)

    if k_base.sum() > n_total:
        # Numerical edge case: back off to ensure feasibility
        t_star *= 0.999
        k_base = np.ceil((t_star * d) / mu).astype(int)
        k_base = np.maximum(k_base, k_min)
        if k_base.sum() > n_total:
            raise RuntimeError("Failed to construct feasible k at t*.")

    return t_star, k_base


def distribute_remaining_fair(k: np.ndarray, mu: np.ndarray, d: np.ndarray, remaining: int) -> np.ndarray:
    # Greedy: repeatedly allocate to smallest current s_i = k_i*mu_i/d_i
    k_out = k.copy()
    for _ in range(remaining):
        s = (k_out * mu) / d
        idx = int(np.argmin(s))
        k_out[idx] += 1
    return k_out


def distribute_remaining_efficient(k: np.ndarray, mu: np.ndarray, remaining: int) -> np.ndarray:
    # Greedy: allocate to highest mu_i (maximizes sum k_i*mu_i)
    k_out = k.copy()
    order = np.argsort(-mu, kind="stable")
    for t in range(remaining):
        k_out[order[t % len(order)]] += 1
    return k_out


@dataclass(frozen=True)
class AllocationResult:
    method: str
    scenario: str
    k: np.ndarray
    t_star: float | None


def compute_metrics(k: np.ndarray, mu: np.ndarray, d: np.ndarray) -> dict[str, float]:
    s = (k * mu) / d
    return {
        "k_sum": float(k.sum()),
        "total_expected_clients": float(np.dot(k, mu)),
        "service_level_min": float(s.min()),
        "service_level_mean": float(s.mean()),
        "service_level_gini": float(gini(s)),
        "service_level_cv": float(s.std(ddof=0) / (s.mean() + 1e-12)),
        "k_gini": float(gini(k.astype(float))),
    }


def improve_efficiency_under_gini_cap(
    *,
    k_start: np.ndarray,
    mu: np.ndarray,
    d: np.ndarray,
    n_total: int,
    k_min: int,
    gini_cap: float,
    max_iters: int = 20000,
) -> np.ndarray:
    if k_start.sum() != n_total:
        raise ValueError("k_start must sum to n_total.")
    if (k_start < k_min).any():
        raise ValueError("k_start violates k_min.")

    k = k_start.copy()
    s = (k * mu) / d
    g = gini(s)
    if g <= gini_cap:
        return k

    for _ in range(max_iters):
        s = (k * mu) / d
        g = gini(s)
        if g <= gini_cap:
            break

        donor_candidates = np.argsort(-s, kind="stable")[:10]
        receiver_candidates = np.argsort(s, kind="stable")[:10]

        best_move = None
        best_score = None

        for donor in donor_candidates:
            if k[donor] <= k_min:
                continue
            for receiver in receiver_candidates:
                if donor == receiver:
                    continue

                k2 = k.copy()
                k2[donor] -= 1
                k2[receiver] += 1
                s2 = (k2 * mu) / d
                g2 = gini(s2)
                if g2 >= g:
                    continue

                eff_loss = mu[donor] - mu[receiver]
                if eff_loss < 0:
                    # This move increases effectiveness and improves fairness; prefer strongly.
                    score = (g - g2) * 1e9 + (-eff_loss)
                else:
                    score = (g - g2) / (eff_loss + 1e-9)

                if best_score is None or score > best_score:
                    best_score = score
                    best_move = (donor, receiver, k2)

        if best_move is None:
            # No improving local swap found; stop.
            break
        k = best_move[2]

    if k.sum() != n_total or (k < k_min).any():
        raise RuntimeError("Post-optimization allocation violated constraints.")
    return k


def main() -> None:
    sites = pd.read_excel(INPUT_XLSX, sheet_name="sites")
    mu = sites["mu_clients_per_visit"].to_numpy(float)
    visits_2019 = sites["visits_2019"].to_numpy(int)

    results: list[AllocationResult] = []
    metrics_rows: list[dict[str, object]] = []

    for scenario, dcol in RHO_COLS:
        d = sites[dcol].to_numpy(float)

        # Baseline: scaled 2019 to sum to N_TOTAL_2021
        k_2019_scaled = largest_remainder_allocation(
            weights=visits_2019.astype(float), total=N_TOTAL_2021, k_min=K_MIN
        )
        results.append(AllocationResult(method="baseline_2019_scaled", scenario=scenario, k=k_2019_scaled, t_star=None))

        # Max-min baseline k achieving best possible min service level under sum constraint.
        t_star, k_base = max_min_service_level(d=d, mu=mu, n_total=N_TOTAL_2021, k_min=K_MIN)
        remaining = N_TOTAL_2021 - int(k_base.sum())

        k_fair = distribute_remaining_fair(k_base, mu=mu, d=d, remaining=remaining)
        results.append(AllocationResult(method="fairness_waterfill", scenario=scenario, k=k_fair, t_star=t_star))

        k_eff = distribute_remaining_efficient(k_base, mu=mu, remaining=remaining)
        results.append(AllocationResult(method="efficiency_post_fair", scenario=scenario, k=k_eff, t_star=t_star))

        # Proportional to surrounding demand proxy D (uses "surrounding communities demand" directly)
        k_D = largest_remainder_allocation(weights=d, total=N_TOTAL_2021, k_min=K_MIN)
        results.append(AllocationResult(method="proportional_D", scenario=scenario, k=k_D, t_star=None))

        # Simple proportional fairness: k proportional to D/mu gives near-constant s in continuous relaxation.
        k_prop = largest_remainder_allocation(weights=d / mu, total=N_TOTAL_2021, k_min=K_MIN)
        results.append(AllocationResult(method="proportional_D_over_mu", scenario=scenario, k=k_prop, t_star=None))

        # Pure efficiency: k proportional to mu (with k_min)
        k_mu = largest_remainder_allocation(weights=mu, total=N_TOTAL_2021, k_min=K_MIN)
        results.append(AllocationResult(method="proportional_mu", scenario=scenario, k=k_mu, t_star=None))

        # Efficiency-maximizing subject to "no worse fairness than baseline_2019_scaled" (auditable cap)
        gini_cap = compute_metrics(k_2019_scaled, mu=mu, d=d)["service_level_gini"]
        k_mu_capped = improve_efficiency_under_gini_cap(
            k_start=k_mu, mu=mu, d=d, n_total=N_TOTAL_2021, k_min=K_MIN, gini_cap=float(gini_cap)
        )
        results.append(AllocationResult(method="proportional_mu_gini_capped_to_2019", scenario=scenario, k=k_mu_capped, t_star=None))

    # Assemble per-site output
    out_sites = sites[["site_id", "site_name", "lat", "lon", "visits_2019", "mu_clients_per_visit", "sd_clients_per_visit"]].copy()

    for res in results:
        col_k = f"k_{res.method}_{res.scenario}"
        out_sites[col_k] = res.k

        d = sites[[c for s, c in RHO_COLS if s == res.scenario][0]].to_numpy(float)
        s_vals = (res.k * mu) / d
        out_sites[f"s_{res.method}_{res.scenario}"] = s_vals

        m = compute_metrics(res.k, mu=mu, d=d)
        row = {"scenario": res.scenario, "method": res.method, "t_star": res.t_star}
        row.update(m)
        metrics_rows.append(row)

    metrics_df = pd.DataFrame(metrics_rows).sort_values(["scenario", "method"]).reset_index(drop=True)

    with pd.ExcelWriter(OUTPUT_XLSX, engine="openpyxl") as writer:
        out_sites.to_excel(writer, index=False, sheet_name="allocations")
        metrics_df.to_excel(writer, index=False, sheet_name="metrics")


if __name__ == "__main__":
    main()