Files
mcm-mfp/task3/01_distance.py

120 lines
3.8 KiB
Python
Raw Permalink Normal View History

2026-01-19 11:57:19 +08:00
"""
Task 3 - Step 1: 距离矩阵计算
=============================
输入: task1/03_allocate.xlsx (70个站点的坐标信息)
输出: task3/01_distance.xlsx (70×70距离矩阵 + 站点信息)
距离计算公式 (Haversine简化):
l_ij = 69.0 * sqrt((lat_i - lat_j)^2 + cos^2(lat_avg * pi/180) * (lon_i - lon_j)^2)
单位: 英里
"""
import pandas as pd
import numpy as np
# ============================================
# 参数设置
# ============================================
INPUT_FILE = '../task1/03_allocate.xlsx'
OUTPUT_FILE = '01_distance.xlsx'
# ============================================
# 读取数据
# ============================================
print("=" * 60)
print("Task 3 - Step 1: 距离矩阵计算")
print("=" * 60)
df = pd.read_excel(INPUT_FILE)
print(f"\n读取站点数据: {len(df)} 个站点")
print(f"列名: {df.columns.tolist()}")
# 提取关键列
sites = df[['site_id', 'site_name', 'lat', 'lon', 'mu', 'sigma', 'mu_tilde', 'k']].copy()
print(f"\n站点数据概览:")
print(sites.head())
# ============================================
# 距离计算函数
# ============================================
def calc_distance(lat1, lon1, lat2, lon2):
"""
计算两点间的近似距离英里
使用Haversine公式的简化版本适用于小范围地域
"""
lat_avg = (lat1 + lat2) / 2
lat_avg_rad = np.radians(lat_avg)
delta_lat = lat1 - lat2
delta_lon = lon1 - lon2
# 69.0 miles per degree of latitude
# cos(lat) correction for longitude
distance = 69.0 * np.sqrt(delta_lat**2 + (np.cos(lat_avg_rad) * delta_lon)**2)
return distance
# ============================================
# 构建距离矩阵
# ============================================
n = len(sites)
distance_matrix = np.zeros((n, n))
for i in range(n):
for j in range(n):
if i != j:
distance_matrix[i, j] = calc_distance(
sites.iloc[i]['lat'], sites.iloc[i]['lon'],
sites.iloc[j]['lat'], sites.iloc[j]['lon']
)
# 转换为DataFrame
site_ids = sites['site_id'].values
df_distance = pd.DataFrame(distance_matrix, index=site_ids, columns=site_ids)
# ============================================
# 统计信息
# ============================================
# 提取上三角(排除对角线)
upper_tri = distance_matrix[np.triu_indices(n, k=1)]
print(f"\n距离矩阵统计:")
print(f" - 站点对总数: {len(upper_tri)}")
print(f" - 最小距离: {upper_tri.min():.2f} 英里")
print(f" - 最大距离: {upper_tri.max():.2f} 英里")
print(f" - 平均距离: {upper_tri.mean():.2f} 英里")
print(f" - 中位数距离: {np.median(upper_tri):.2f} 英里")
# 按阈值统计
thresholds = [30, 40, 50, 60, 70]
print(f"\n距离阈值统计:")
for th in thresholds:
count = np.sum(upper_tri <= th)
print(f" - ≤ {th} 英里: {count} 对 ({count/len(upper_tri)*100:.1f}%)")
# ============================================
# 保存结果
# ============================================
with pd.ExcelWriter(OUTPUT_FILE, engine='openpyxl') as writer:
# Sheet 1: 站点信息
sites.to_excel(writer, sheet_name='sites', index=False)
# Sheet 2: 距离矩阵
df_distance.to_excel(writer, sheet_name='distance_matrix')
# Sheet 3: 距离统计
stats = pd.DataFrame({
'metric': ['min', 'max', 'mean', 'median', 'std', 'total_pairs'],
'value': [upper_tri.min(), upper_tri.max(), upper_tri.mean(),
np.median(upper_tri), upper_tri.std(), len(upper_tri)]
})
stats.to_excel(writer, sheet_name='statistics', index=False)
print(f"\n结果已保存至: {OUTPUT_FILE}")
print(" - Sheet 'sites': 站点信息")
print(" - Sheet 'distance_matrix': 70×70距离矩阵")
print(" - Sheet 'statistics': 距离统计")
print("\n" + "=" * 60)