Files
mcm-mfp/task3/01_distance.py
2026-01-19 11:57:19 +08:00

120 lines
3.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Task 3 - Step 1: 距离矩阵计算
=============================
输入: task1/03_allocate.xlsx (70个站点的坐标信息)
输出: task3/01_distance.xlsx (70×70距离矩阵 + 站点信息)
距离计算公式 (Haversine简化):
l_ij = 69.0 * sqrt((lat_i - lat_j)^2 + cos^2(lat_avg * pi/180) * (lon_i - lon_j)^2)
单位: 英里
"""
import pandas as pd
import numpy as np
# ============================================
# 参数设置
# ============================================
INPUT_FILE = '../task1/03_allocate.xlsx'
OUTPUT_FILE = '01_distance.xlsx'
# ============================================
# 读取数据
# ============================================
print("=" * 60)
print("Task 3 - Step 1: 距离矩阵计算")
print("=" * 60)
df = pd.read_excel(INPUT_FILE)
print(f"\n读取站点数据: {len(df)} 个站点")
print(f"列名: {df.columns.tolist()}")
# 提取关键列
sites = df[['site_id', 'site_name', 'lat', 'lon', 'mu', 'sigma', 'mu_tilde', 'k']].copy()
print(f"\n站点数据概览:")
print(sites.head())
# ============================================
# 距离计算函数
# ============================================
def calc_distance(lat1, lon1, lat2, lon2):
"""
计算两点间的近似距离(英里)
使用Haversine公式的简化版本适用于小范围地域
"""
lat_avg = (lat1 + lat2) / 2
lat_avg_rad = np.radians(lat_avg)
delta_lat = lat1 - lat2
delta_lon = lon1 - lon2
# 69.0 miles per degree of latitude
# cos(lat) correction for longitude
distance = 69.0 * np.sqrt(delta_lat**2 + (np.cos(lat_avg_rad) * delta_lon)**2)
return distance
# ============================================
# 构建距离矩阵
# ============================================
n = len(sites)
distance_matrix = np.zeros((n, n))
for i in range(n):
for j in range(n):
if i != j:
distance_matrix[i, j] = calc_distance(
sites.iloc[i]['lat'], sites.iloc[i]['lon'],
sites.iloc[j]['lat'], sites.iloc[j]['lon']
)
# 转换为DataFrame
site_ids = sites['site_id'].values
df_distance = pd.DataFrame(distance_matrix, index=site_ids, columns=site_ids)
# ============================================
# 统计信息
# ============================================
# 提取上三角(排除对角线)
upper_tri = distance_matrix[np.triu_indices(n, k=1)]
print(f"\n距离矩阵统计:")
print(f" - 站点对总数: {len(upper_tri)}")
print(f" - 最小距离: {upper_tri.min():.2f} 英里")
print(f" - 最大距离: {upper_tri.max():.2f} 英里")
print(f" - 平均距离: {upper_tri.mean():.2f} 英里")
print(f" - 中位数距离: {np.median(upper_tri):.2f} 英里")
# 按阈值统计
thresholds = [30, 40, 50, 60, 70]
print(f"\n距离阈值统计:")
for th in thresholds:
count = np.sum(upper_tri <= th)
print(f" - ≤ {th} 英里: {count} 对 ({count/len(upper_tri)*100:.1f}%)")
# ============================================
# 保存结果
# ============================================
with pd.ExcelWriter(OUTPUT_FILE, engine='openpyxl') as writer:
# Sheet 1: 站点信息
sites.to_excel(writer, sheet_name='sites', index=False)
# Sheet 2: 距离矩阵
df_distance.to_excel(writer, sheet_name='distance_matrix')
# Sheet 3: 距离统计
stats = pd.DataFrame({
'metric': ['min', 'max', 'mean', 'median', 'std', 'total_pairs'],
'value': [upper_tri.min(), upper_tri.max(), upper_tri.mean(),
np.median(upper_tri), upper_tri.std(), len(upper_tri)]
})
stats.to_excel(writer, sheet_name='statistics', index=False)
print(f"\n结果已保存至: {OUTPUT_FILE}")
print(" - Sheet 'sites': 站点信息")
print(" - Sheet 'distance_matrix': 70×70距离矩阵")
print(" - Sheet 'statistics': 距离统计")
print("\n" + "=" * 60)