Files
mcm-mfp/task1/export_paper_tables.py

122 lines
4.5 KiB
Python
Raw Permalink Normal View History

2026-01-20 03:45:15 +08:00
from __future__ import annotations
import math
from pathlib import Path
import pandas as pd
ROOT = Path(__file__).resolve().parent
OUT_DIR = ROOT / "paper_tables"
def _to_int_series(s: pd.Series) -> pd.Series:
return s.map(lambda x: int(round(float(x))) if pd.notna(x) else x)
def _to_float_series(s: pd.Series, ndigits: int) -> pd.Series:
return s.map(lambda x: round(float(x), ndigits) if pd.notna(x) else x)
def _format_table1_metrics_summary(df: pd.DataFrame) -> pd.DataFrame:
out = df.copy()
out["E1_total_service"] = _to_int_series(out["E1_total_service"])
out["E2_quality_weighted"] = _to_int_series(out["E2_quality_weighted"])
out["F1_gini"] = _to_float_series(out["F1_gini"], 4)
out["F2_min_satisfaction"] = _to_float_series(out["F2_min_satisfaction"], 2)
out["F3_cv_satisfaction"] = _to_float_series(out["F3_cv_satisfaction"], 4)
out["E1_total_service_pct"] = _to_float_series(out["E1_total_service_pct"], 2)
out["E2_quality_weighted_pct"] = _to_float_series(out["E2_quality_weighted_pct"], 2)
return out
def _format_table2_corrected_sites(df: pd.DataFrame) -> pd.DataFrame:
out = df.copy()
out["site_id"] = _to_int_series(out["site_id"])
out["mu"] = _to_float_series(out["mu"], 1)
out["sigma"] = _to_float_series(out["sigma"], 1)
out["p_trunc"] = _to_float_series(out["p_trunc"], 3)
out["mu_tilde"] = _to_float_series(out["mu_tilde"], 1)
out["mu_delta"] = _to_float_series(out["mu_delta"], 1)
out["mu_delta_pct"] = _to_float_series(out["mu_delta_pct"], 1)
return out
def _format_table3_backtest_summary(df: pd.DataFrame) -> pd.DataFrame:
out = df.copy()
out["value"] = out["value"].map(lambda x: round(float(x), 6) if pd.notna(x) else x)
out["p_value"] = out["p_value"].map(lambda x: round(float(x), 6) if pd.notna(x) else x)
return out
def _format_appendix_site_details(df: pd.DataFrame) -> pd.DataFrame:
out = df.copy()
out["site_id"] = _to_int_series(out["site_id"])
out["mu"] = _to_float_series(out["mu"], 1)
out["mu_tilde"] = _to_float_series(out["mu_tilde"], 1)
out["k"] = _to_int_series(out["k"])
out["annual_service"] = _to_int_series(out["annual_service"])
out["r"] = _to_float_series(out["r"], 4)
out["quality_factor"] = _to_float_series(out["quality_factor"], 4)
out["service_quality_weighted"] = _to_int_series(out["service_quality_weighted"])
return out
def export_tables() -> list[Path]:
OUT_DIR.mkdir(parents=True, exist_ok=True)
written: list[Path] = []
# Table 1: overall performance comparison
metrics_xlsx = ROOT / "04_metrics.xlsx"
metrics = pd.read_excel(metrics_xlsx, sheet_name="metrics_summary")
metrics_paper = _format_table1_metrics_summary(metrics)
p1 = OUT_DIR / "table1_metrics_summary.csv"
metrics_paper.to_csv(p1, index=False, encoding="utf-8-sig")
written.append(p1)
# Table 2: truncation correction (corrected sites)
backtest_xlsx = ROOT / "07_backtest.xlsx"
corrected = pd.read_excel(backtest_xlsx, sheet_name="corrected_sites")
corrected = corrected.copy()
corrected["mu_delta"] = corrected["mu_tilde"] - corrected["mu"]
corrected["mu_delta_pct"] = corrected["mu_delta"] / corrected["mu"] * 100
corrected_paper = _format_table2_corrected_sites(corrected)
p2 = OUT_DIR / "table2_corrected_sites.csv"
corrected_paper.to_csv(p2, index=False, encoding="utf-8-sig")
written.append(p2)
# Table 3: backtest & fit statistics
summary = pd.read_excel(backtest_xlsx, sheet_name="summary_metrics")
summary_paper = _format_table3_backtest_summary(summary)
p3 = OUT_DIR / "table3_backtest_summary.csv"
summary_paper.to_csv(p3, index=False, encoding="utf-8-sig")
written.append(p3)
# Table 4: constraint validation results
validate_xlsx = ROOT / "06_validate.xlsx"
validation = pd.read_excel(validate_xlsx, sheet_name="validation_results")
p4 = OUT_DIR / "table4_validation_results.csv"
validation.to_csv(p4, index=False, encoding="utf-8-sig")
written.append(p4)
# Appendix: per-site allocation details (optional for paper appendix)
site_details = pd.read_excel(metrics_xlsx, sheet_name="site_details")
site_details_paper = _format_appendix_site_details(site_details)
p5 = OUT_DIR / "appendix_site_details.csv"
site_details_paper.to_csv(p5, index=False, encoding="utf-8-sig")
written.append(p5)
return written
def main() -> None:
written = export_tables()
rel = [p.relative_to(ROOT) for p in written]
print("Wrote:")
for p in rel:
print(f" - {p}")
if __name__ == "__main__":
main()