529 lines
17 KiB
Python
529 lines
17 KiB
Python
|
|
"""
|
|||
|
|
Task 3: k-min allocation with ordered two-stop pairing.
|
|||
|
|
|
|||
|
|
- Uses 2019 data to allocate visit frequencies.
|
|||
|
|
- Pairs are drawn from ordered_pairs_allocation_k6_cap250.csv (ordered i->j).
|
|||
|
|
- Total annual trips (paired + single) are fixed to N_TARGET via fixed-point adjustment.
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
from __future__ import annotations
|
|||
|
|
|
|||
|
|
import argparse
|
|||
|
|
import os
|
|||
|
|
import math
|
|||
|
|
from typing import Dict, List, Tuple
|
|||
|
|
|
|||
|
|
import numpy as np
|
|||
|
|
import pandas as pd
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
import matplotlib
|
|||
|
|
|
|||
|
|
matplotlib.use("Agg")
|
|||
|
|
import matplotlib.pyplot as plt
|
|||
|
|
|
|||
|
|
_HAS_MPL = True
|
|||
|
|
except ModuleNotFoundError:
|
|||
|
|
plt = None
|
|||
|
|
_HAS_MPL = False
|
|||
|
|
|
|||
|
|
|
|||
|
|
INPUT_XLSX = "prob/MFP Regular Sites 2019.xlsx"
|
|||
|
|
INPUT_PAIRS = "data/ordered_pairs_allocation_k6_cap250.csv"
|
|||
|
|
OUTPUT_DIR = "data"
|
|||
|
|
|
|||
|
|
C_OPT = 250
|
|||
|
|
N_TARGET = 730
|
|||
|
|
ALPHA = 0.6
|
|||
|
|
BETA = 0.2
|
|||
|
|
N_SIMS = 2000
|
|||
|
|
RANDOM_SEED = 606
|
|||
|
|
|
|||
|
|
|
|||
|
|
def gini_coefficient(values: np.ndarray) -> float:
|
|||
|
|
x = np.asarray(values, dtype=float)
|
|||
|
|
x = x[np.isfinite(x)]
|
|||
|
|
if x.size == 0:
|
|||
|
|
return 0.0
|
|||
|
|
x = np.clip(x, 0, None)
|
|||
|
|
total = x.sum()
|
|||
|
|
if total <= 0:
|
|||
|
|
return 0.0
|
|||
|
|
|
|||
|
|
x_sorted = np.sort(x)
|
|||
|
|
n = x_sorted.size
|
|||
|
|
idx = np.arange(1, n + 1, dtype=float)
|
|||
|
|
return float((2.0 * (idx * x_sorted).sum()) / (n * total) - (n + 1.0) / n)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _norm_pdf(z):
|
|||
|
|
return np.exp(-0.5 * z * z) / np.sqrt(2.0 * np.pi)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _norm_cdf(z):
|
|||
|
|
z = np.asarray(z, dtype=float)
|
|||
|
|
erf_vec = np.vectorize(math.erf, otypes=[float])
|
|||
|
|
return 0.5 * (1.0 + erf_vec(z / np.sqrt(2.0)))
|
|||
|
|
|
|||
|
|
|
|||
|
|
def expected_clipped_normal(mu, sigma, lower=0.0, upper=1.0):
|
|||
|
|
mu = np.asarray(mu, dtype=float)
|
|||
|
|
sigma = np.asarray(sigma, dtype=float)
|
|||
|
|
lower = float(lower)
|
|||
|
|
upper = float(upper)
|
|||
|
|
|
|||
|
|
if lower > upper:
|
|||
|
|
raise ValueError("lower must be <= upper")
|
|||
|
|
|
|||
|
|
out = np.empty_like(mu, dtype=float)
|
|||
|
|
mask = sigma > 0
|
|||
|
|
out[~mask] = np.clip(mu[~mask], lower, upper)
|
|||
|
|
|
|||
|
|
if np.any(mask):
|
|||
|
|
m = mu[mask]
|
|||
|
|
s = sigma[mask]
|
|||
|
|
|
|||
|
|
z_u = (upper - m) / s
|
|||
|
|
z_l = (lower - m) / s
|
|||
|
|
|
|||
|
|
Phi_u = _norm_cdf(z_u)
|
|||
|
|
Phi_l = _norm_cdf(z_l)
|
|||
|
|
phi_u = _norm_pdf(z_u)
|
|||
|
|
phi_l = _norm_pdf(z_l)
|
|||
|
|
|
|||
|
|
ex_le_u = m * Phi_u - s * phi_u
|
|||
|
|
ex_le_l = m * Phi_l - s * phi_l
|
|||
|
|
|
|||
|
|
p_le_l = Phi_l
|
|||
|
|
p_gt_u = 1.0 - Phi_u
|
|||
|
|
|
|||
|
|
out[mask] = lower * p_le_l + (ex_le_u - ex_le_l) + upper * p_gt_u
|
|||
|
|
|
|||
|
|
return out
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _find_col(df: pd.DataFrame, candidates: List[str]) -> str:
|
|||
|
|
for name in candidates:
|
|||
|
|
if name in df.columns:
|
|||
|
|
return name
|
|||
|
|
lower_map = {c.lower(): c for c in df.columns}
|
|||
|
|
for name in candidates:
|
|||
|
|
key = name.lower()
|
|||
|
|
if key in lower_map:
|
|||
|
|
return lower_map[key]
|
|||
|
|
raise ValueError(f"Missing required column. Tried: {candidates}")
|
|||
|
|
|
|||
|
|
|
|||
|
|
def load_sites(path: str) -> pd.DataFrame:
|
|||
|
|
df = pd.read_excel(path)
|
|||
|
|
|
|||
|
|
col_site = _find_col(df, ["Site Name", "site name", "site"])
|
|||
|
|
col_mu = _find_col(df, ["Average Demand per Visit", "average demand per visit", "avg demand"])
|
|||
|
|
col_sigma = _find_col(df, ["StDev(Demand per Visit)", "stdev(demand per visit)", "stdev", "std"])
|
|||
|
|
col_visits = _find_col(df, ["Number of Visits in 2019", "number of visits in 2019", "visits"])
|
|||
|
|
|
|||
|
|
out = df[[col_site, col_mu, col_sigma, col_visits]].copy()
|
|||
|
|
out.columns = ["site_name", "mu", "sigma", "visits_2019"]
|
|||
|
|
out["mu"] = pd.to_numeric(out["mu"], errors="coerce")
|
|||
|
|
out["sigma"] = pd.to_numeric(out["sigma"], errors="coerce").fillna(0.0)
|
|||
|
|
out["visits_2019"] = pd.to_numeric(out["visits_2019"], errors="coerce")
|
|||
|
|
if out[["mu", "visits_2019"]].isna().any().any():
|
|||
|
|
missing = out[out[["mu", "visits_2019"]].isna().any(axis=1)]
|
|||
|
|
raise ValueError(f"Missing mu/visits_2019 for {len(missing)} rows.")
|
|||
|
|
|
|||
|
|
out = out.reset_index(drop=True)
|
|||
|
|
out["site_idx"] = np.arange(1, len(out) + 1, dtype=int)
|
|||
|
|
out["TotalDemand"] = out["mu"] * out["visits_2019"]
|
|||
|
|
return out
|
|||
|
|
|
|||
|
|
|
|||
|
|
def allocate_visits(df: pd.DataFrame, k_min_real: float, n_total: int) -> np.ndarray:
|
|||
|
|
df_sorted = df.sort_values("TotalDemand").reset_index(drop=False)
|
|||
|
|
n = len(df_sorted)
|
|||
|
|
|
|||
|
|
k_floor = int(np.floor(k_min_real))
|
|||
|
|
k_ceil = int(np.ceil(k_min_real))
|
|||
|
|
frac = k_min_real - k_floor
|
|||
|
|
|
|||
|
|
n_ceil = int(round(n * frac))
|
|||
|
|
n_floor = n - n_ceil
|
|||
|
|
|
|||
|
|
k_base = np.array([k_floor] * n_floor + [k_ceil] * n_ceil, dtype=int)
|
|||
|
|
|
|||
|
|
n_reserved = int(k_base.sum())
|
|||
|
|
n_free = int(n_total - n_reserved)
|
|||
|
|
if n_free < 0:
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
weights = df_sorted["TotalDemand"] / df_sorted["TotalDemand"].sum()
|
|||
|
|
allocated = (k_base + n_free * weights.values).round().astype(int)
|
|||
|
|
allocated = np.maximum(allocated, k_base)
|
|||
|
|
|
|||
|
|
diff = int(n_total - allocated.sum())
|
|||
|
|
if diff != 0:
|
|||
|
|
sorted_idx = weights.sort_values(ascending=(diff < 0)).index.tolist()
|
|||
|
|
for idx in sorted_idx[:abs(diff)]:
|
|||
|
|
allocated[idx] += int(np.sign(diff))
|
|||
|
|
|
|||
|
|
alloc_sorted = df_sorted[["site_idx"]].copy()
|
|||
|
|
alloc_sorted["AllocatedVisits"] = allocated
|
|||
|
|
alloc = alloc_sorted.sort_values("site_idx")["AllocatedVisits"].to_numpy(dtype=int)
|
|||
|
|
return alloc
|
|||
|
|
|
|||
|
|
|
|||
|
|
def assign_pairs(pairs_df: pd.DataFrame, visits: np.ndarray) -> Tuple[pd.DataFrame, np.ndarray]:
|
|||
|
|
remaining = visits.astype(int).copy()
|
|||
|
|
pair_counts = np.zeros(len(pairs_df), dtype=int)
|
|||
|
|
|
|||
|
|
for idx, row in pairs_df.iterrows():
|
|||
|
|
i = int(row["site_i_idx"]) - 1
|
|||
|
|
j = int(row["site_j_idx"]) - 1
|
|||
|
|
if remaining[i] <= 0 or remaining[j] <= 0:
|
|||
|
|
continue
|
|||
|
|
t = int(min(remaining[i], remaining[j]))
|
|||
|
|
if t <= 0:
|
|||
|
|
continue
|
|||
|
|
pair_counts[idx] = t
|
|||
|
|
remaining[i] -= t
|
|||
|
|
remaining[j] -= t
|
|||
|
|
|
|||
|
|
out = pairs_df.copy()
|
|||
|
|
out["pair_count"] = pair_counts
|
|||
|
|
return out, remaining
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _compute_metrics(
|
|||
|
|
sites: pd.DataFrame,
|
|||
|
|
visits: np.ndarray,
|
|||
|
|
pairs_with_counts: pd.DataFrame,
|
|||
|
|
singles: np.ndarray,
|
|||
|
|
*,
|
|||
|
|
alpha: float,
|
|||
|
|
beta: float,
|
|||
|
|
capacity: float,
|
|||
|
|
) -> Dict[str, float]:
|
|||
|
|
n = len(sites)
|
|||
|
|
mu = sites["mu"].to_numpy(dtype=float)
|
|||
|
|
sigma = sites["sigma"].to_numpy(dtype=float)
|
|||
|
|
demand = sites["TotalDemand"].to_numpy(dtype=float)
|
|||
|
|
|
|||
|
|
eff_single = expected_clipped_normal(mu, sigma, lower=0.0, upper=capacity)
|
|||
|
|
served_single = singles * eff_single
|
|||
|
|
cap_single = singles * capacity
|
|||
|
|
|
|||
|
|
pair_first = np.zeros(n, dtype=float)
|
|||
|
|
pair_second = np.zeros(n, dtype=float)
|
|||
|
|
served_first = np.zeros(n, dtype=float)
|
|||
|
|
served_second = np.zeros(n, dtype=float)
|
|||
|
|
cap_first = np.zeros(n, dtype=float)
|
|||
|
|
cap_second = np.zeros(n, dtype=float)
|
|||
|
|
|
|||
|
|
for _, row in pairs_with_counts.iterrows():
|
|||
|
|
count = int(row["pair_count"])
|
|||
|
|
if count <= 0:
|
|||
|
|
continue
|
|||
|
|
i = int(row["site_i_idx"]) - 1
|
|||
|
|
j = int(row["site_j_idx"]) - 1
|
|||
|
|
q_opt = float(row["q_opt"])
|
|||
|
|
served_i = float(row["served_i_mean"])
|
|||
|
|
served_j = float(row["served_j_mean"])
|
|||
|
|
|
|||
|
|
pair_first[i] += count
|
|||
|
|
pair_second[j] += count
|
|||
|
|
served_first[i] += count * served_i
|
|||
|
|
served_second[j] += count * served_j
|
|||
|
|
cap_first[i] += count * q_opt
|
|||
|
|
cap_second[j] += count * (capacity - q_opt)
|
|||
|
|
|
|||
|
|
annual_eff = served_single + served_first + served_second
|
|||
|
|
cap_total = cap_single + cap_first + cap_second
|
|||
|
|
|
|||
|
|
with np.errstate(divide="ignore", invalid="ignore"):
|
|||
|
|
base = np.where(demand > 0, annual_eff / demand, 0.0)
|
|||
|
|
unmet = np.where(demand > 0, np.maximum(0.0, demand - annual_eff) / demand, 0.0)
|
|||
|
|
waste = np.where(cap_total > 0, np.maximum(0.0, cap_total - annual_eff) / cap_total, 0.0)
|
|||
|
|
|
|||
|
|
score = np.clip(base - alpha * unmet - beta * waste, 0.0, 1.0)
|
|||
|
|
bottom_n = max(1, int(np.ceil(n * 0.10)))
|
|||
|
|
|
|||
|
|
total_served = float(annual_eff.sum())
|
|||
|
|
total_demand = float(demand.sum())
|
|||
|
|
total_unmet = float(np.maximum(0.0, demand - annual_eff).sum())
|
|||
|
|
total_waste = float(np.maximum(0.0, cap_total - annual_eff).sum())
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
"effectiveness": float(score.mean()),
|
|||
|
|
"min_eff": float(score.min()),
|
|||
|
|
"bottom10_eff": float(np.sort(score)[:bottom_n].mean()),
|
|||
|
|
"gini_eff": float(gini_coefficient(score)),
|
|||
|
|
"std_eff": float(score.std()),
|
|||
|
|
"total_unmet": total_unmet,
|
|||
|
|
"total_waste": total_waste,
|
|||
|
|
"total_served": total_served,
|
|||
|
|
"total_demand": total_demand,
|
|||
|
|
"serve_ratio": float(total_served / total_demand) if total_demand > 0 else 0.0,
|
|||
|
|
"score_per_site": score,
|
|||
|
|
"annual_eff": annual_eff,
|
|||
|
|
"pair_first": pair_first,
|
|||
|
|
"pair_second": pair_second,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
def allocate_with_pairs(
|
|||
|
|
sites: pd.DataFrame,
|
|||
|
|
pairs_df: pd.DataFrame,
|
|||
|
|
k_min: float,
|
|||
|
|
*,
|
|||
|
|
n_target: int,
|
|||
|
|
capacity: float,
|
|||
|
|
max_iter: int = 30,
|
|||
|
|
) -> Tuple[np.ndarray, np.ndarray, pd.DataFrame, int]:
|
|||
|
|
n_total_guess = int(n_target)
|
|||
|
|
|
|||
|
|
for _ in range(max_iter):
|
|||
|
|
visits = allocate_visits(sites, k_min, n_total_guess)
|
|||
|
|
if visits is None:
|
|||
|
|
return None, None, None, None
|
|||
|
|
|
|||
|
|
pairs_with_counts, singles = assign_pairs(pairs_df, visits)
|
|||
|
|
pair_total = int(pairs_with_counts["pair_count"].sum())
|
|||
|
|
new_guess = int(n_target + pair_total)
|
|||
|
|
if new_guess == n_total_guess:
|
|||
|
|
return visits, singles, pairs_with_counts, n_total_guess
|
|||
|
|
n_total_guess = new_guess
|
|||
|
|
|
|||
|
|
return visits, singles, pairs_with_counts, n_total_guess
|
|||
|
|
|
|||
|
|
|
|||
|
|
def select_kmin(results: pd.DataFrame) -> float:
|
|||
|
|
gini_candidates = results.loc[results["gini_eff"] < 0.2, "k_min"]
|
|||
|
|
if len(gini_candidates) > 0:
|
|||
|
|
return float(gini_candidates.iloc[0])
|
|||
|
|
idx = results["effectiveness"].idxmax()
|
|||
|
|
return float(results.loc[idx, "k_min"])
|
|||
|
|
|
|||
|
|
|
|||
|
|
def plot_results(results: pd.DataFrame, output_dir: str) -> float:
|
|||
|
|
if not _HAS_MPL:
|
|||
|
|
raise RuntimeError("缺少依赖: matplotlib(无法绘图)。请先安装 matplotlib 再运行绘图部分。")
|
|||
|
|
|
|||
|
|
fig, axes = plt.subplots(4, 2, figsize=(12, 13))
|
|||
|
|
|
|||
|
|
selected_k = select_kmin(results)
|
|||
|
|
selected_idx = (results["k_min"] - selected_k).abs().idxmin()
|
|||
|
|
selected_eff = float(results.loc[selected_idx, "effectiveness"])
|
|||
|
|
selected_label = f"Selected k_min={selected_k:.1f}"
|
|||
|
|
|
|||
|
|
ax = axes[0, 0]
|
|||
|
|
ax.plot(results["k_min"], results["effectiveness"], "b-", lw=2)
|
|||
|
|
ax.axvline(selected_k, color="r", ls="--", label=selected_label)
|
|||
|
|
ax.scatter([selected_k], [selected_eff], c="r", s=100, zorder=5)
|
|||
|
|
ax.set_xlabel("k_min")
|
|||
|
|
ax.set_ylabel("Mean Effectiveness")
|
|||
|
|
ax.set_title("Mean Effectiveness vs k_min")
|
|||
|
|
ax.legend()
|
|||
|
|
ax.grid(True, alpha=0.3)
|
|||
|
|
|
|||
|
|
ax = axes[0, 1]
|
|||
|
|
ax.plot(results["k_min"], results["bottom10_eff"], "m-", lw=2)
|
|||
|
|
ax.axvline(selected_k, color="r", ls="--")
|
|||
|
|
ax.set_xlabel("k_min")
|
|||
|
|
ax.set_ylabel("Bottom 10% Mean Effectiveness")
|
|||
|
|
ax.set_title("Bottom 10% Mean Effectiveness vs k_min")
|
|||
|
|
ax.grid(True, alpha=0.3)
|
|||
|
|
|
|||
|
|
ax = axes[1, 0]
|
|||
|
|
ax.plot(results["k_min"], results["total_served"] / 1000, "c-", lw=2)
|
|||
|
|
ax.axhline(results["total_demand"].iloc[0] / 1000, color="gray", ls=":", label="Total Demand")
|
|||
|
|
ax.axvline(selected_k, color="r", ls="--")
|
|||
|
|
ax.set_xlabel("k_min")
|
|||
|
|
ax.set_ylabel("Served Families (×1000)")
|
|||
|
|
ax.set_title("Total Served vs k_min")
|
|||
|
|
ax.legend()
|
|||
|
|
ax.grid(True, alpha=0.3)
|
|||
|
|
|
|||
|
|
ax = axes[1, 1]
|
|||
|
|
ax.plot(results["k_min"], results["min_eff"], "g-", lw=2)
|
|||
|
|
ax.axvline(selected_k, color="r", ls="--")
|
|||
|
|
ax.set_xlabel("k_min")
|
|||
|
|
ax.set_ylabel("Min Effectiveness")
|
|||
|
|
ax.set_title("Worst Site Effectiveness vs k_min")
|
|||
|
|
ax.grid(True, alpha=0.3)
|
|||
|
|
|
|||
|
|
ax = axes[2, 0]
|
|||
|
|
ax.plot(results["k_min"], results["unmet"] / 1000, "r-", lw=2, label="Unmet")
|
|||
|
|
ax.plot(results["k_min"], results["waste"] / 1000, "b-", lw=2, label="Waste")
|
|||
|
|
ax.axvline(selected_k, color="gray", ls="--")
|
|||
|
|
ax.set_xlabel("k_min")
|
|||
|
|
ax.set_ylabel("Families (×1000)")
|
|||
|
|
ax.set_title("Unmet Demand vs Wasted Capacity")
|
|||
|
|
ax.legend()
|
|||
|
|
ax.grid(True, alpha=0.3)
|
|||
|
|
|
|||
|
|
ax = axes[2, 1]
|
|||
|
|
ax.plot(results["k_min"], results["std_eff"], color="tab:orange", lw=2)
|
|||
|
|
ax.axvline(selected_k, color="gray", ls="--")
|
|||
|
|
ax.set_xlabel("k_min")
|
|||
|
|
ax.set_ylabel("Std Effectiveness")
|
|||
|
|
ax.set_title("Effectiveness Std vs k_min")
|
|||
|
|
ax.grid(True, alpha=0.3)
|
|||
|
|
|
|||
|
|
ax = axes[3, 0]
|
|||
|
|
ax.plot(results["k_min"], results["gini_eff"], color="tab:purple", lw=2)
|
|||
|
|
ax.axhline(0.2, color="gray", ls=":", lw=1)
|
|||
|
|
ax.axvline(selected_k, color="r", ls="--")
|
|||
|
|
ax.set_xlabel("k_min")
|
|||
|
|
ax.set_ylabel("Gini Coefficient")
|
|||
|
|
ax.set_title("Gini (Effectiveness) vs k_min")
|
|||
|
|
ax.grid(True, alpha=0.3)
|
|||
|
|
|
|||
|
|
axes[3, 1].axis("off")
|
|||
|
|
|
|||
|
|
plt.tight_layout()
|
|||
|
|
os.makedirs(output_dir, exist_ok=True)
|
|||
|
|
plt.savefig(os.path.join(output_dir, "p3_kmin_effectiveness.png"), dpi=150)
|
|||
|
|
plt.close(fig)
|
|||
|
|
|
|||
|
|
return selected_k
|
|||
|
|
|
|||
|
|
|
|||
|
|
def main() -> None:
|
|||
|
|
parser = argparse.ArgumentParser(description="Task 3 k-min allocation with two-stop pairing.")
|
|||
|
|
parser.add_argument("--input-xlsx", default=INPUT_XLSX)
|
|||
|
|
parser.add_argument("--input-pairs", default=INPUT_PAIRS)
|
|||
|
|
parser.add_argument("--output-dir", default=OUTPUT_DIR)
|
|||
|
|
parser.add_argument("--kmin-start", type=float, default=1.0)
|
|||
|
|
parser.add_argument("--kmin-end", type=float, default=10.0)
|
|||
|
|
parser.add_argument("--kmin-step", type=float, default=0.1)
|
|||
|
|
parser.add_argument("--capacity", type=float, default=C_OPT)
|
|||
|
|
parser.add_argument("--n-target", type=int, default=N_TARGET)
|
|||
|
|
parser.add_argument("--alpha", type=float, default=ALPHA)
|
|||
|
|
parser.add_argument("--beta", type=float, default=BETA)
|
|||
|
|
args = parser.parse_args()
|
|||
|
|
|
|||
|
|
sites = load_sites(args.input_xlsx)
|
|||
|
|
pairs = pd.read_csv(args.input_pairs)
|
|||
|
|
required_cols = {
|
|||
|
|
"site_i_idx",
|
|||
|
|
"site_j_idx",
|
|||
|
|
"score_mean",
|
|||
|
|
"q_opt",
|
|||
|
|
"served_i_mean",
|
|||
|
|
"served_j_mean",
|
|||
|
|
"distance_miles",
|
|||
|
|
}
|
|||
|
|
missing = required_cols.difference(pairs.columns)
|
|||
|
|
if missing:
|
|||
|
|
raise ValueError(f"Missing columns in pairs CSV: {sorted(missing)}")
|
|||
|
|
|
|||
|
|
pairs = pairs.sort_values(
|
|||
|
|
["score_mean", "distance_miles"], ascending=[False, True]
|
|||
|
|
).reset_index(drop=True)
|
|||
|
|
|
|||
|
|
k_range = np.arange(args.kmin_start, args.kmin_end + 1e-9, args.kmin_step)
|
|||
|
|
results = []
|
|||
|
|
|
|||
|
|
for k_min in k_range:
|
|||
|
|
visits, singles, pairs_with_counts, n_total_guess = allocate_with_pairs(
|
|||
|
|
sites,
|
|||
|
|
pairs,
|
|||
|
|
float(k_min),
|
|||
|
|
n_target=args.n_target,
|
|||
|
|
capacity=args.capacity,
|
|||
|
|
)
|
|||
|
|
if visits is None:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
metrics = _compute_metrics(
|
|||
|
|
sites,
|
|||
|
|
visits,
|
|||
|
|
pairs_with_counts,
|
|||
|
|
singles,
|
|||
|
|
alpha=args.alpha,
|
|||
|
|
beta=args.beta,
|
|||
|
|
capacity=args.capacity,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
pair_total = int(pairs_with_counts["pair_count"].sum())
|
|||
|
|
total_trips = int(visits.sum() - pair_total)
|
|||
|
|
row = {
|
|||
|
|
"k_min": float(k_min),
|
|||
|
|
"effectiveness": metrics["effectiveness"],
|
|||
|
|
"min_eff": metrics["min_eff"],
|
|||
|
|
"bottom10_eff": metrics["bottom10_eff"],
|
|||
|
|
"gini_eff": metrics["gini_eff"],
|
|||
|
|
"std_eff": metrics["std_eff"],
|
|||
|
|
"unmet": metrics["total_unmet"],
|
|||
|
|
"waste": metrics["total_waste"],
|
|||
|
|
"total_served": metrics["total_served"],
|
|||
|
|
"total_demand": metrics["total_demand"],
|
|||
|
|
"serve_ratio": metrics["serve_ratio"],
|
|||
|
|
"total_visits_allocated": int(visits.sum()),
|
|||
|
|
"pair_trips": pair_total,
|
|||
|
|
"total_trips": total_trips,
|
|||
|
|
"n_total_guess": int(n_total_guess),
|
|||
|
|
}
|
|||
|
|
results.append(row)
|
|||
|
|
|
|||
|
|
results_df = pd.DataFrame(results)
|
|||
|
|
if len(results_df) == 0:
|
|||
|
|
raise RuntimeError("No feasible k_min values found.")
|
|||
|
|
|
|||
|
|
best_k = select_kmin(results_df)
|
|||
|
|
best_idx = (results_df["k_min"] - best_k).abs().idxmin()
|
|||
|
|
|
|||
|
|
visits, singles, pairs_with_counts, n_total_guess = allocate_with_pairs(
|
|||
|
|
sites,
|
|||
|
|
pairs,
|
|||
|
|
float(best_k),
|
|||
|
|
n_target=args.n_target,
|
|||
|
|
capacity=args.capacity,
|
|||
|
|
)
|
|||
|
|
metrics = _compute_metrics(
|
|||
|
|
sites,
|
|||
|
|
visits,
|
|||
|
|
pairs_with_counts,
|
|||
|
|
singles,
|
|||
|
|
alpha=args.alpha,
|
|||
|
|
beta=args.beta,
|
|||
|
|
capacity=args.capacity,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
pair_total = int(pairs_with_counts["pair_count"].sum())
|
|||
|
|
total_trips = int(visits.sum() - pair_total)
|
|||
|
|
|
|||
|
|
site_rows = pd.DataFrame(
|
|||
|
|
{
|
|||
|
|
"site_idx": sites["site_idx"],
|
|||
|
|
"site_name": sites["site_name"],
|
|||
|
|
"total_visits_allocated": visits,
|
|||
|
|
"single_visits": singles,
|
|||
|
|
"paired_first": metrics["pair_first"].astype(int),
|
|||
|
|
"paired_second": metrics["pair_second"].astype(int),
|
|||
|
|
"paired_total": (metrics["pair_first"] + metrics["pair_second"]).astype(int),
|
|||
|
|
}
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
pairs_out = pairs_with_counts.loc[pairs_with_counts["pair_count"] > 0].copy()
|
|||
|
|
pairs_out = pairs_out.sort_values(["pair_count", "score_mean"], ascending=[False, False])
|
|||
|
|
|
|||
|
|
os.makedirs(args.output_dir, exist_ok=True)
|
|||
|
|
results_df.to_csv(os.path.join(args.output_dir, "p3_kmin_data.csv"), index=False)
|
|||
|
|
site_rows.to_csv(os.path.join(args.output_dir, "p3_kmin_sites.csv"), index=False)
|
|||
|
|
pairs_out.to_csv(os.path.join(args.output_dir, "p3_kmin_pairs.csv"), index=False)
|
|||
|
|
|
|||
|
|
if _HAS_MPL:
|
|||
|
|
plot_results(results_df, args.output_dir)
|
|||
|
|
else:
|
|||
|
|
print("未检测到 matplotlib,跳过绘图(仍会保存CSV结果)。")
|
|||
|
|
|
|||
|
|
print(f"Best k_min={best_k:.1f} (total_trips={total_trips}, pair_trips={pair_total})")
|
|||
|
|
print(
|
|||
|
|
"Saved: data/p3_kmin_data.csv, data/p3_kmin_sites.csv, "
|
|||
|
|
"data/p3_kmin_pairs.csv, data/p3_kmin_effectiveness.png"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|