task3/generate_readme_tables.py


import pandas as pd
import numpy as np

def format_table_1_pairings():
    try:
        df = pd.read_excel('02_pairing.xlsx', sheet_name='selected_pairs')
        df = df.sort_values('value', ascending=False).head(10)
        
        print("\n### Top 10 High Value Pairings")
        print("| 排名 | 站点i (Site i) | 站点j (Site j) | 距离 (mi) | 需求和 (μ_sum) | 价值 (V_ij) |")
        print("|---|---|---|---|---|---|")
        
        for idx, row in df.iterrows():
            rank = idx + 1 # Assuming index resets or we just use counter. Actually index might not be 0-based if sorted.
            # But wait, df is from read_excel, default index 0..N.
            # If I sorted it, index is shuffled.
            # I should use a counter.
            pass
            
        for i, (idx, row) in enumerate(df.iterrows()):
            rank = i + 1
            site_i = row['site_i_name'].replace('MFP ', '')
            site_j = row['site_j_name'].replace('MFP ', '')
            dist = f"{row['distance']:.1f} mi"
            mu_sum = f"{row['mu_sum']:.0f}"
            val = f"{row['value']:.2f}"
            print(f"| {rank} | {site_i} | {site_j} | {dist} | {mu_sum} | {val} |")
    except Exception as e:
        print(f"Error generating Table 1: {e}")

def format_table_2_allocation():
    try:
        df = pd.read_excel('03_allocation.xlsx', sheet_name='allocation')
        df = df.sort_values('value', ascending=False).head(5)
        
        print("\n### Top 5 Pairings' Optimal Allocation Strategies")
        print("| 配对 | μ_i | μ_j | σ_i | σ_j | q* | 比例 |")
        print("|---|---|---|---|---|---|---|")
        
        for _, row in df.iterrows():
            name_i = row['site_i_name'].replace('MFP ', '').split(' - ')[0]
            name_j = row['site_j_name'].replace('MFP ', '').split(' - ')[0]
            # If still too long, take first 15 chars
            if len(name_i) > 15: name_i = name_i[:12] + "..."
            if len(name_j) > 15: name_j = name_j[:12] + "..."
            
            pair = f"{name_i} + {name_j}"
            mu_i = f"{row['mu_i']:.1f}"
            mu_j = f"{row['mu_j']:.1f}"
            sigma_i = f"{row['sigma_i']:.1f}"
            sigma_j = f"{row['sigma_j']:.1f}"
            q_star = f"{row['q_final']:.1f}"
            ratio = f"{row['q_ratio']:.1%}"
            print(f"| {pair} | {mu_i} | {mu_j} | {sigma_i} | {sigma_j} | {q_star} | {ratio} |")
    except Exception as e:
        print(f"Error generating Table 2: {e}")

def format_table_3_comparison():
    try:
        df = pd.read_excel('06_evaluate.xlsx', sheet_name='comparison')
        # The structure is Metric, Task 1, Task 3, Change, Change %
        
        print("\n### Actual Result Comparison Analysis")
        print("| 指标 | Task 1 | Task 3 | 变化 | 变化% |")
        print("|---|---|---|---|---|")
        
        for _, row in df.iterrows():
            metric = row['Metric']
            # Bold important metrics
            if 'E1' in metric or 'E2' in metric or 'RS' in metric or 'R1' in metric:
                metric = f"**{metric}**"
                
            task1 = row['Task 1']
            task3 = row['Task 3']
            change = row['Change']
            change_pct = row['Change %']
            
            # Format numbers
            if 'Gini' in row['Metric'] or '最低' in row['Metric'] or '风险' in row['Metric']:
                t1_str = f"{task1:.3f}" if isinstance(task1, (int, float)) else task1
                t3_str = f"{task3:.3f}" if isinstance(task3, (int, float)) else task3
                ch_str = f"{change:+.3f}" if isinstance(change, (int, float)) else change
            else:
                t1_str = f"{task1:,.0f}" if isinstance(task1, (int, float)) else task1
                t3_str = f"{task3:,.0f}" if isinstance(task3, (int, float)) else task3
                ch_str = f"{change:+,.0f}" if isinstance(change, (int, float)) else change
                
            if '风险' in row['Metric'] and task1 == 0:
                 pct_str = "新增"
            elif '节省' in row['Metric'] and task1 == 0:
                 pct_str = "新增"
            else:
                 pct_str = f"{change_pct:+.1f}%" if isinstance(change_pct, (int, float)) else change_pct
                 
            # Bold significant improvements
            if isinstance(change_pct, (int, float)) and change_pct > 5:
                pct_str = f"**{pct_str}**"
            
            print(f"| {metric} | {t1_str} | {t3_str} | {ch_str} | {pct_str} |")
            
        # Add R1 and RS from task3_metrics sheet
        try:
            metrics_df = pd.read_excel('06_evaluate.xlsx', sheet_name='task3_metrics')
            r1_val = metrics_df.loc[metrics_df['metric']=='R1', 'value'].values[0]
            rs_val = metrics_df.loc[metrics_df['metric']=='RS', 'value'].values[0] # Note: stored as RS/100 (e.g. 0.095)
            visits_val = metrics_df.loc[metrics_df['metric']=='total_dual_visits', 'value'].values[0]
            
            print(f"\nTotal Dual Visits: {visits_val}")

            # Format R1
            # Task 1 R1 is 0
            print(f"| **R1 (缺口风险)** | 0 | {r1_val:.3f} | +{r1_val:.3f} | 新增 |")
            
            # Format RS
            # Task 1 RS is 0%
            rs_pct = rs_val * 100
            print(f"| **RS (资源节省)** | 0% | {rs_pct:.1f}% | +{rs_pct:.1f}% | 新增 |")
            
        except Exception as e2:
            print(f"Error adding R1/RS: {e2}")
            
    except Exception as e:
        print(f"Error generating Table 3: {e}")

def format_table_4_sensitivity():
    try:
        df = pd.read_excel('07_sensitivity.xlsx', sheet_name='summary')
        
        print("\n### Sensitivity Analysis Results")
        print("| 参数 | E1范围 | E2范围 | R1范围 |")
        print("|---|---|---|---|")
        
        for _, row in df.iterrows():
            param = row['Parameter']
            e1_range = row['E1_Range']
            e2_range = row['E2_Range']
            r1_range = row['R1_Range']
            
            # Add variable percentage if available
            e1_var = row['E1_Var%']
            e2_var = row['E2_Var%']
            
            e1_str = f"{e1_range} ({e1_var:+.2f}%)" if pd.notnull(e1_var) else e1_range
            e2_str = f"{e2_range} ({e2_var:+.2f}%)" if pd.notnull(e2_var) else e2_range
            
            # Map param names to symbols
            if param == 'merge_ratio': param = '$r_{merge}$'
            elif param == 'l_max': param = '$l_{max}$'
            elif param == 'mu_sum_max': param = '$\mu_{sum,max}$'
            elif param == 'cv_max': param = '$CV_{max}$'
            
            print(f"| {param} | {e1_str} | {e2_str} | {r1_range} |")
            
    except Exception as e:
        print(f"Error generating Table 4: {e}")

if __name__ == "__main__":
    format_table_1_pairings()
    format_table_2_allocation()
    format_table_3_comparison()
    format_table_4_sensitivity()