""" Task 3 - Step 1: 距离矩阵计算 ============================= 输入: task1/03_allocate.xlsx (70个站点的坐标信息) 输出: task3/01_distance.xlsx (70×70距离矩阵 + 站点信息) 距离计算公式 (Haversine简化): l_ij = 69.0 * sqrt((lat_i - lat_j)^2 + cos^2(lat_avg * pi/180) * (lon_i - lon_j)^2) 单位: 英里 """ import pandas as pd import numpy as np # ============================================ # 参数设置 # ============================================ INPUT_FILE = '../task1/03_allocate.xlsx' OUTPUT_FILE = '01_distance.xlsx' # ============================================ # 读取数据 # ============================================ print("=" * 60) print("Task 3 - Step 1: 距离矩阵计算") print("=" * 60) df = pd.read_excel(INPUT_FILE) print(f"\n读取站点数据: {len(df)} 个站点") print(f"列名: {df.columns.tolist()}") # 提取关键列 sites = df[['site_id', 'site_name', 'lat', 'lon', 'mu', 'sigma', 'mu_tilde', 'k']].copy() print(f"\n站点数据概览:") print(sites.head()) # ============================================ # 距离计算函数 # ============================================ def calc_distance(lat1, lon1, lat2, lon2): """ 计算两点间的近似距离(英里) 使用Haversine公式的简化版本,适用于小范围地域 """ lat_avg = (lat1 + lat2) / 2 lat_avg_rad = np.radians(lat_avg) delta_lat = lat1 - lat2 delta_lon = lon1 - lon2 # 69.0 miles per degree of latitude # cos(lat) correction for longitude distance = 69.0 * np.sqrt(delta_lat**2 + (np.cos(lat_avg_rad) * delta_lon)**2) return distance # ============================================ # 构建距离矩阵 # ============================================ n = len(sites) distance_matrix = np.zeros((n, n)) for i in range(n): for j in range(n): if i != j: distance_matrix[i, j] = calc_distance( sites.iloc[i]['lat'], sites.iloc[i]['lon'], sites.iloc[j]['lat'], sites.iloc[j]['lon'] ) # 转换为DataFrame site_ids = sites['site_id'].values df_distance = pd.DataFrame(distance_matrix, index=site_ids, columns=site_ids) # ============================================ # 统计信息 # ============================================ # 提取上三角(排除对角线) upper_tri = distance_matrix[np.triu_indices(n, k=1)] print(f"\n距离矩阵统计:") print(f" - 站点对总数: {len(upper_tri)}") print(f" - 最小距离: {upper_tri.min():.2f} 英里") print(f" - 最大距离: {upper_tri.max():.2f} 英里") print(f" - 平均距离: {upper_tri.mean():.2f} 英里") print(f" - 中位数距离: {np.median(upper_tri):.2f} 英里") # 按阈值统计 thresholds = [30, 40, 50, 60, 70] print(f"\n距离阈值统计:") for th in thresholds: count = np.sum(upper_tri <= th) print(f" - ≤ {th} 英里: {count} 对 ({count/len(upper_tri)*100:.1f}%)") # ============================================ # 保存结果 # ============================================ with pd.ExcelWriter(OUTPUT_FILE, engine='openpyxl') as writer: # Sheet 1: 站点信息 sites.to_excel(writer, sheet_name='sites', index=False) # Sheet 2: 距离矩阵 df_distance.to_excel(writer, sheet_name='distance_matrix') # Sheet 3: 距离统计 stats = pd.DataFrame({ 'metric': ['min', 'max', 'mean', 'median', 'std', 'total_pairs'], 'value': [upper_tri.min(), upper_tri.max(), upper_tri.mean(), np.median(upper_tri), upper_tri.std(), len(upper_tri)] }) stats.to_excel(writer, sheet_name='statistics', index=False) print(f"\n结果已保存至: {OUTPUT_FILE}") print(" - Sheet 'sites': 站点信息") print(" - Sheet 'distance_matrix': 70×70距离矩阵") print(" - Sheet 'statistics': 距离统计") print("\n" + "=" * 60)