120 lines
3.8 KiB
Python
120 lines
3.8 KiB
Python
|
|
"""
|
|||
|
|
Task 3 - Step 1: 距离矩阵计算
|
|||
|
|
=============================
|
|||
|
|
|
|||
|
|
输入: task1/03_allocate.xlsx (70个站点的坐标信息)
|
|||
|
|
输出: task3/01_distance.xlsx (70×70距离矩阵 + 站点信息)
|
|||
|
|
|
|||
|
|
距离计算公式 (Haversine简化):
|
|||
|
|
l_ij = 69.0 * sqrt((lat_i - lat_j)^2 + cos^2(lat_avg * pi/180) * (lon_i - lon_j)^2)
|
|||
|
|
|
|||
|
|
单位: 英里
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import pandas as pd
|
|||
|
|
import numpy as np
|
|||
|
|
|
|||
|
|
# ============================================
|
|||
|
|
# 参数设置
|
|||
|
|
# ============================================
|
|||
|
|
INPUT_FILE = '../task1/03_allocate.xlsx'
|
|||
|
|
OUTPUT_FILE = '01_distance.xlsx'
|
|||
|
|
|
|||
|
|
# ============================================
|
|||
|
|
# 读取数据
|
|||
|
|
# ============================================
|
|||
|
|
print("=" * 60)
|
|||
|
|
print("Task 3 - Step 1: 距离矩阵计算")
|
|||
|
|
print("=" * 60)
|
|||
|
|
|
|||
|
|
df = pd.read_excel(INPUT_FILE)
|
|||
|
|
print(f"\n读取站点数据: {len(df)} 个站点")
|
|||
|
|
print(f"列名: {df.columns.tolist()}")
|
|||
|
|
|
|||
|
|
# 提取关键列
|
|||
|
|
sites = df[['site_id', 'site_name', 'lat', 'lon', 'mu', 'sigma', 'mu_tilde', 'k']].copy()
|
|||
|
|
print(f"\n站点数据概览:")
|
|||
|
|
print(sites.head())
|
|||
|
|
|
|||
|
|
# ============================================
|
|||
|
|
# 距离计算函数
|
|||
|
|
# ============================================
|
|||
|
|
def calc_distance(lat1, lon1, lat2, lon2):
|
|||
|
|
"""
|
|||
|
|
计算两点间的近似距离(英里)
|
|||
|
|
使用Haversine公式的简化版本,适用于小范围地域
|
|||
|
|
"""
|
|||
|
|
lat_avg = (lat1 + lat2) / 2
|
|||
|
|
lat_avg_rad = np.radians(lat_avg)
|
|||
|
|
|
|||
|
|
delta_lat = lat1 - lat2
|
|||
|
|
delta_lon = lon1 - lon2
|
|||
|
|
|
|||
|
|
# 69.0 miles per degree of latitude
|
|||
|
|
# cos(lat) correction for longitude
|
|||
|
|
distance = 69.0 * np.sqrt(delta_lat**2 + (np.cos(lat_avg_rad) * delta_lon)**2)
|
|||
|
|
|
|||
|
|
return distance
|
|||
|
|
|
|||
|
|
# ============================================
|
|||
|
|
# 构建距离矩阵
|
|||
|
|
# ============================================
|
|||
|
|
n = len(sites)
|
|||
|
|
distance_matrix = np.zeros((n, n))
|
|||
|
|
|
|||
|
|
for i in range(n):
|
|||
|
|
for j in range(n):
|
|||
|
|
if i != j:
|
|||
|
|
distance_matrix[i, j] = calc_distance(
|
|||
|
|
sites.iloc[i]['lat'], sites.iloc[i]['lon'],
|
|||
|
|
sites.iloc[j]['lat'], sites.iloc[j]['lon']
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 转换为DataFrame
|
|||
|
|
site_ids = sites['site_id'].values
|
|||
|
|
df_distance = pd.DataFrame(distance_matrix, index=site_ids, columns=site_ids)
|
|||
|
|
|
|||
|
|
# ============================================
|
|||
|
|
# 统计信息
|
|||
|
|
# ============================================
|
|||
|
|
# 提取上三角(排除对角线)
|
|||
|
|
upper_tri = distance_matrix[np.triu_indices(n, k=1)]
|
|||
|
|
|
|||
|
|
print(f"\n距离矩阵统计:")
|
|||
|
|
print(f" - 站点对总数: {len(upper_tri)}")
|
|||
|
|
print(f" - 最小距离: {upper_tri.min():.2f} 英里")
|
|||
|
|
print(f" - 最大距离: {upper_tri.max():.2f} 英里")
|
|||
|
|
print(f" - 平均距离: {upper_tri.mean():.2f} 英里")
|
|||
|
|
print(f" - 中位数距离: {np.median(upper_tri):.2f} 英里")
|
|||
|
|
|
|||
|
|
# 按阈值统计
|
|||
|
|
thresholds = [30, 40, 50, 60, 70]
|
|||
|
|
print(f"\n距离阈值统计:")
|
|||
|
|
for th in thresholds:
|
|||
|
|
count = np.sum(upper_tri <= th)
|
|||
|
|
print(f" - ≤ {th} 英里: {count} 对 ({count/len(upper_tri)*100:.1f}%)")
|
|||
|
|
|
|||
|
|
# ============================================
|
|||
|
|
# 保存结果
|
|||
|
|
# ============================================
|
|||
|
|
with pd.ExcelWriter(OUTPUT_FILE, engine='openpyxl') as writer:
|
|||
|
|
# Sheet 1: 站点信息
|
|||
|
|
sites.to_excel(writer, sheet_name='sites', index=False)
|
|||
|
|
|
|||
|
|
# Sheet 2: 距离矩阵
|
|||
|
|
df_distance.to_excel(writer, sheet_name='distance_matrix')
|
|||
|
|
|
|||
|
|
# Sheet 3: 距离统计
|
|||
|
|
stats = pd.DataFrame({
|
|||
|
|
'metric': ['min', 'max', 'mean', 'median', 'std', 'total_pairs'],
|
|||
|
|
'value': [upper_tri.min(), upper_tri.max(), upper_tri.mean(),
|
|||
|
|
np.median(upper_tri), upper_tri.std(), len(upper_tri)]
|
|||
|
|
})
|
|||
|
|
stats.to_excel(writer, sheet_name='statistics', index=False)
|
|||
|
|
|
|||
|
|
print(f"\n结果已保存至: {OUTPUT_FILE}")
|
|||
|
|
print(" - Sheet 'sites': 站点信息")
|
|||
|
|
print(" - Sheet 'distance_matrix': 70×70距离矩阵")
|
|||
|
|
print(" - Sheet 'statistics': 距离统计")
|
|||
|
|
print("\n" + "=" * 60)
|