Files
mcm-mfp/task3/02_pairing.py
2026-01-19 11:57:19 +08:00

210 lines
7.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Task 3 - Step 2: 共生站点配对筛选与选择
=======================================
输入: 01_distance.xlsx (距离矩阵 + 站点信息)
输出: 02_pairing.xlsx (筛选后的配对列表 + 最终选择)
配对筛选条件:
1. 距离约束: l_ij ≤ l_max (50英里)
2. 容量约束: μ_i + μ_j ≤ 450
3. 稳定性约束: CV_i ≤ 0.5 且 CV_j ≤ 0.5
配对价值函数:
V_ij = α * (μ_i + μ_j) / Q - β * l_ij / l_max - γ * (σ_i² + σ_j²) / (μ_i + μ_j)²
配对选择: 贪心算法,每个站点最多配对一次
"""
import pandas as pd
import numpy as np
# ============================================
# 参数设置
# ============================================
INPUT_FILE = '01_distance.xlsx'
OUTPUT_FILE = '02_pairing.xlsx'
# 约束参数
L_MAX = 50 # 距离阈值 (英里)
MU_SUM_MAX = 450 # 需求和上限
CV_MAX = 0.5 # 变异系数上限
Q = 400 # 卡车容量
# 价值函数权重
ALPHA = 1.0 # 容量利用率权重
BETA = 0.3 # 距离惩罚权重
GAMMA = 0.5 # 风险惩罚权重
# ============================================
# 读取数据
# ============================================
print("=" * 60)
print("Task 3 - Step 2: 共生站点配对筛选与选择")
print("=" * 60)
# 读取站点信息
sites = pd.read_excel(INPUT_FILE, sheet_name='sites')
print(f"\n读取站点数据: {len(sites)} 个站点")
# 读取距离矩阵
dist_matrix = pd.read_excel(INPUT_FILE, sheet_name='distance_matrix', index_col=0)
print(f"读取距离矩阵: {dist_matrix.shape}")
# 计算变异系数
sites['cv'] = sites['sigma'] / sites['mu']
print(f"\nCV范围: [{sites['cv'].min():.3f}, {sites['cv'].max():.3f}]")
print(f"CV > {CV_MAX} 的站点数: {(sites['cv'] > CV_MAX).sum()}")
# ============================================
# 配对筛选
# ============================================
print(f"\n" + "-" * 40)
print("配对筛选")
print("-" * 40)
candidates = []
n = len(sites)
for i in range(n):
for j in range(i + 1, n): # 只考虑上三角
site_i = sites.iloc[i]
site_j = sites.iloc[j]
# 获取距离
dist = dist_matrix.iloc[i, j]
# 约束检查
# 1. 距离约束
if dist > L_MAX:
continue
# 2. 容量约束
mu_sum = site_i['mu'] + site_j['mu']
if mu_sum > MU_SUM_MAX:
continue
# 3. 稳定性约束
if site_i['cv'] > CV_MAX or site_j['cv'] > CV_MAX:
continue
# 通过所有约束,计算配对价值
sigma_sq_sum = site_i['sigma']**2 + site_j['sigma']**2
# 价值函数
value = (ALPHA * mu_sum / Q
- BETA * dist / L_MAX
- GAMMA * sigma_sq_sum / mu_sum**2)
candidates.append({
'site_i_id': site_i['site_id'],
'site_j_id': site_j['site_id'],
'site_i_name': site_i['site_name'],
'site_j_name': site_j['site_name'],
'distance': dist,
'mu_i': site_i['mu'],
'mu_j': site_j['mu'],
'mu_sum': mu_sum,
'sigma_i': site_i['sigma'],
'sigma_j': site_j['sigma'],
'cv_i': site_i['cv'],
'cv_j': site_j['cv'],
'k_i': site_i['k'],
'k_j': site_j['k'],
'value': value
})
df_candidates = pd.DataFrame(candidates)
print(f"通过约束的候选配对数: {len(df_candidates)}")
if len(df_candidates) > 0:
print(f"配对价值范围: [{df_candidates['value'].min():.3f}, {df_candidates['value'].max():.3f}]")
print(f"平均距离: {df_candidates['distance'].mean():.2f} 英里")
print(f"平均需求和: {df_candidates['mu_sum'].mean():.1f}")
# ============================================
# 贪心配对选择
# ============================================
print(f"\n" + "-" * 40)
print("贪心配对选择")
print("-" * 40)
# 按价值降序排序
df_candidates_sorted = df_candidates.sort_values('value', ascending=False).reset_index(drop=True)
# 贪心选择
selected_pairs = []
used_sites = set()
for _, row in df_candidates_sorted.iterrows():
site_i = row['site_i_id']
site_j = row['site_j_id']
# 检查是否已被使用
if site_i not in used_sites and site_j not in used_sites:
selected_pairs.append(row.to_dict())
used_sites.add(site_i)
used_sites.add(site_j)
df_selected = pd.DataFrame(selected_pairs)
print(f"最终选择配对数: {len(df_selected)}")
print(f"涉及站点数: {len(used_sites)} (占总站点 {len(used_sites)/n*100:.1f}%)")
print(f"未配对站点数: {n - len(used_sites)}")
if len(df_selected) > 0:
print(f"\n选中配对的价值范围: [{df_selected['value'].min():.3f}, {df_selected['value'].max():.3f}]")
print(f"选中配对的距离范围: [{df_selected['distance'].min():.2f}, {df_selected['distance'].max():.2f}] 英里")
print(f"选中配对的需求和范围: [{df_selected['mu_sum'].min():.1f}, {df_selected['mu_sum'].max():.1f}]")
# ============================================
# 显示选中的配对
# ============================================
print(f"\n" + "-" * 40)
print("选中的配对列表")
print("-" * 40)
if len(df_selected) > 0:
display_cols = ['site_i_name', 'site_j_name', 'distance', 'mu_sum', 'k_i', 'k_j', 'value']
print(df_selected[display_cols].to_string(index=False))
# ============================================
# 保存结果
# ============================================
with pd.ExcelWriter(OUTPUT_FILE, engine='openpyxl') as writer:
# Sheet 1: 站点信息含CV
sites.to_excel(writer, sheet_name='sites', index=False)
# Sheet 2: 所有候选配对
df_candidates_sorted.to_excel(writer, sheet_name='all_candidates', index=False)
# Sheet 3: 最终选择的配对
df_selected.to_excel(writer, sheet_name='selected_pairs', index=False)
# Sheet 4: 参数记录
params = pd.DataFrame({
'parameter': ['L_MAX', 'MU_SUM_MAX', 'CV_MAX', 'Q', 'ALPHA', 'BETA', 'GAMMA'],
'value': [L_MAX, MU_SUM_MAX, CV_MAX, Q, ALPHA, BETA, GAMMA],
'description': ['距离阈值(英里)', '需求和上限', 'CV上限', '卡车容量',
'容量利用率权重', '距离惩罚权重', '风险惩罚权重']
})
params.to_excel(writer, sheet_name='parameters', index=False)
# Sheet 5: 汇总统计
summary = pd.DataFrame({
'metric': ['total_sites', 'candidate_pairs', 'selected_pairs',
'paired_sites', 'unpaired_sites', 'avg_distance', 'avg_mu_sum'],
'value': [n, len(df_candidates), len(df_selected),
len(used_sites), n - len(used_sites),
df_selected['distance'].mean() if len(df_selected) > 0 else 0,
df_selected['mu_sum'].mean() if len(df_selected) > 0 else 0]
})
summary.to_excel(writer, sheet_name='summary', index=False)
print(f"\n结果已保存至: {OUTPUT_FILE}")
print(" - Sheet 'sites': 站点信息含CV")
print(" - Sheet 'all_candidates': 所有候选配对")
print(" - Sheet 'selected_pairs': 最终选择的配对")
print(" - Sheet 'parameters': 参数记录")
print(" - Sheet 'summary': 汇总统计")
print("\n" + "=" * 60)