diff --git a/analyze_visits.py b/analyze_visits.py deleted file mode 100644 index 5f62561..0000000 --- a/analyze_visits.py +++ /dev/null @@ -1,135 +0,0 @@ -""" -分析:访问总次数是否由每次访问平均需求量决定 -使用相关性分析和回归分析 -""" -import pandas as pd -import numpy as np -from scipy import stats -import matplotlib.pyplot as plt - -# 读取数据 -df = pd.read_excel('prob/MFP Regular Sites 2019.xlsx') - -# 提取关键列 -visits = df['Number of Visits in 2019'] -avg_demand = df['Average Demand per Visit'] -std_demand = df['StDev(Demand per Visit)'] - -print("=" * 60) -print("数据基本统计") -print("=" * 60) -print(f"样本数量: {len(visits)}") -print(f"\n访问总次数:") -print(f" 均值: {visits.mean():.2f}, 标准差: {visits.std():.2f}") -print(f"\n每次访问平均需求量:") -print(f" 均值: {avg_demand.mean():.2f}, 标准差: {avg_demand.std():.2f}") - -# 1. 皮尔逊相关系数分析 -print("\n" + "=" * 60) -print("1. 皮尔逊相关系数分析") -print("=" * 60) -r, p_value = stats.pearsonr(avg_demand, visits) -print(f"相关系数 r = {r:.4f}") -print(f"p值 = {p_value:.4e}") -print(f"决定系数 R² = {r**2:.4f} (可解释{r**2*100:.1f}%的变异)") - -if p_value < 0.05: - print("结论: p < 0.05, 相关性显著") -else: - print("结论: p >= 0.05, 相关性不显著") - -# 2. 线性回归分析 -print("\n" + "=" * 60) -print("2. 线性回归分析 (访问次数 ~ 平均需求量)") -print("=" * 60) -slope, intercept, r_val, p_val, std_err = stats.linregress(avg_demand, visits) -print(f"回归方程: 访问次数 = {slope:.4f} × 平均需求量 + {intercept:.4f}") -print(f"斜率标准误: {std_err:.4f}") -print(f"p值: {p_val:.4e}") - -# 3. 标准差作为辅助分析 -print("\n" + "=" * 60) -print("3. 标准差辅助分析") -print("=" * 60) -# 变异系数 (CV) = 标准差/均值, 衡量相对离散程度 -cv = std_demand / avg_demand -print(f"变异系数 (CV = 标准差/均值) 统计:") -print(f" 均值: {cv.mean():.4f}") -print(f" 范围: {cv.min():.4f} - {cv.max():.4f}") - -# 标准差与访问次数的相关性 -r_std, p_std = stats.pearsonr(std_demand.dropna(), visits[std_demand.notna()]) -print(f"\n标准差与访问次数的相关系数: r = {r_std:.4f}, p = {p_std:.4e}") - -# 4. 多元回归 (平均需求量 + 标准差 -> 访问次数) -print("\n" + "=" * 60) -print("4. 多元回归分析 (同时考虑平均需求量和标准差)") -print("=" * 60) -from sklearn.linear_model import LinearRegression -from sklearn.preprocessing import StandardScaler - -# 准备数据 (去除缺失值) -mask = std_demand.notna() -X = np.column_stack([avg_demand[mask], std_demand[mask]]) -y = visits[mask] - -model = LinearRegression() -model.fit(X, y) -y_pred = model.predict(X) -ss_res = np.sum((y - y_pred) ** 2) -ss_tot = np.sum((y - y.mean()) ** 2) -r2_multi = 1 - ss_res / ss_tot - -print(f"多元 R² = {r2_multi:.4f} (可解释{r2_multi*100:.1f}%的变异)") -print(f"系数: 平均需求量 = {model.coef_[0]:.4f}, 标准差 = {model.coef_[1]:.4f}") -print(f"截距: {model.intercept_:.4f}") - -# 5. 总结 -print("\n" + "=" * 60) -print("综合结论") -print("=" * 60) -if abs(r) < 0.3: - strength = "弱" -elif abs(r) < 0.7: - strength = "中等" -else: - strength = "强" - -direction = "正" if r > 0 else "负" -print(f"• 平均需求量与访问次数呈{strength}{direction}相关 (r={r:.3f})") -print(f"• 平均需求量仅能解释访问次数{r**2*100:.1f}%的变异") -print(f"• 加入标准差后可解释{r2_multi*100:.1f}%的变异") - -if r**2 < 0.25: - print("• 结论: 访问总次数主要不由每次访问平均需求量决定") -else: - print("• 结论: 每次访问平均需求量对访问总次数有较大影响") - -# 绘图 -fig, axes = plt.subplots(1, 2, figsize=(12, 5)) - -# 散点图 + 回归线 -ax1 = axes[0] -ax1.scatter(avg_demand, visits, alpha=0.6, edgecolors='black', linewidth=0.5) -x_line = np.linspace(avg_demand.min(), avg_demand.max(), 100) -y_line = slope * x_line + intercept -ax1.plot(x_line, y_line, 'r-', linewidth=2, label=f'回归线 (R²={r**2:.3f})') -ax1.set_xlabel('Average Demand per Visit (每次访问平均需求量)') -ax1.set_ylabel('Number of Visits (访问总次数)') -ax1.set_title('访问次数 vs 平均需求量') -ax1.legend() -ax1.grid(True, alpha=0.3) - -# 残差图 -ax2 = axes[1] -residuals = visits - (slope * avg_demand + intercept) -ax2.scatter(avg_demand, residuals, alpha=0.6, edgecolors='black', linewidth=0.5) -ax2.axhline(y=0, color='r', linestyle='--', linewidth=2) -ax2.set_xlabel('Average Demand per Visit (每次访问平均需求量)') -ax2.set_ylabel('Residuals (残差)') -ax2.set_title('残差分析') -ax2.grid(True, alpha=0.3) - -plt.tight_layout() -plt.savefig('analysis_result.png', dpi=150, bbox_inches='tight') -print("\n图表已保存至 analysis_result.png") diff --git a/kmin_effectiveness.png b/kmin_effectiveness.png new file mode 100644 index 0000000..da6da19 Binary files /dev/null and b/kmin_effectiveness.png differ diff --git a/kmin_effectiveness.py b/kmin_effectiveness.py new file mode 100644 index 0000000..ec49942 --- /dev/null +++ b/kmin_effectiveness.py @@ -0,0 +1,263 @@ +""" +K_min-有效性分析 +k_min为实数:如2.7表示70%站点最低2次,30%站点最低3次 +""" + +import numpy as np +import pandas as pd + +try: + import matplotlib + + # 允许在无GUI环境生成图片 + matplotlib.use("Agg") + import matplotlib.pyplot as plt + + _HAS_MPL = True +except ModuleNotFoundError: + plt = None + _HAS_MPL = False + +# 参数 +C_OPT = 250 +N_TOTAL = 722 +ALPHA = 0.4 +BETA = 0.2 + + +def load_data(): + """加载数据""" + df = pd.read_excel('prob/MFP Regular Sites 2019.xlsx') + df = df.drop(columns=['Unnamed: 10', 'Demand per Visit == the number of clients serviced on that visit']) + df['TotalDemand'] = df['Average Demand per Visit'] * df['Number of Visits in 2019'] + return df.sort_values('TotalDemand').reset_index(drop=True) + + +def allocate_visits(df, k_min_real, n_total, c_opt): + """ + 根据实数k_min分配访问次数 + + k_min=2.7: floor=2, ceil=3, 70%站点得2次, 30%站点得3次 + 低需求站点优先获得较低的k_min(已按TotalDemand排序) + """ + df = df.copy() + n = len(df) + + k_floor = int(np.floor(k_min_real)) + k_ceil = int(np.ceil(k_min_real)) + frac = k_min_real - k_floor # 获得ceil的比例 + + # 分配最低次数:低需求站点得floor,高需求站点得ceil + n_ceil = int(round(n * frac)) + n_floor = n - n_ceil + + k_base = np.array([k_floor] * n_floor + [k_ceil] * n_ceil) + df['K_base'] = k_base + + # 计算剩余运力 + n_reserved = k_base.sum() + n_free = n_total - n_reserved + + if n_free < 0: + return None + + # 按需求权重分配剩余运力 + df['Weight'] = df['TotalDemand'] / df['TotalDemand'].sum() + df['AllocatedVisits'] = (k_base + n_free * df['Weight'].values).round().astype(int) + df['AllocatedVisits'] = np.maximum(df['AllocatedVisits'], k_base) + + # 调整总数 + diff = n_total - df['AllocatedVisits'].sum() + if diff != 0: + sorted_idx = df['Weight'].sort_values(ascending=(diff < 0)).index.tolist() + for idx in sorted_idx[:abs(diff)]: + df.loc[idx, 'AllocatedVisits'] += int(np.sign(diff)) + + return df + + +def calc_effectiveness(df, c_opt=C_OPT, alpha=ALPHA, beta=BETA): + """计算有效性指标""" + d = df['Average Demand per Visit'].values + k = df['AllocatedVisits'].values + D = df['TotalDemand'].values + + # 截断:单次有效服务 = min(需求, 容量) + eff_per_visit = np.minimum(d, c_opt) + annual_eff = k * eff_per_visit + + # 缺货率、浪费率 + unmet = np.maximum(0, D - annual_eff) / np.maximum(D, 1) + waste = np.maximum(0, k * c_opt - D) / np.maximum(k * c_opt, 1) + + # 有效性得分 + base = annual_eff / np.maximum(D, 1) + score = np.clip(base - alpha * unmet - beta * waste, 0, 1) + + n = score.size + bottom_n = max(1, int(np.ceil(n * 0.10))) + bottom10_mean = float(np.sort(score)[:bottom_n].mean()) + + # 总服务客户数 = Σ min(供给能力, 需求) + total_served = np.minimum(k * c_opt, D).sum() + total_demand = D.sum() + + return { + 'mean': score.mean(), + 'min': score.min(), + 'bottom10_mean': bottom10_mean, + 'std': score.std(), + 'total_unmet': (D - annual_eff).clip(min=0).sum(), + 'total_waste': (k * c_opt - D).clip(min=0).sum(), + 'total_served': total_served, + 'total_demand': total_demand, + 'serve_ratio': total_served / total_demand + } + + +def analyze_kmin_range(df, k_range=np.arange(1.0, 10.1, 0.1)): + """扫描k_min范围,计算有效性""" + results = [] + + for k_min in k_range: + df_alloc = allocate_visits(df, k_min, N_TOTAL, C_OPT) + if df_alloc is None: + continue + + metrics = calc_effectiveness(df_alloc) + results.append({ + 'k_min': k_min, + 'effectiveness': metrics['mean'], + 'min_eff': metrics['min'], + 'bottom10_eff': metrics['bottom10_mean'], + 'std_eff': metrics['std'], + 'unmet': metrics['total_unmet'], + 'waste': metrics['total_waste'], + 'total_served': metrics['total_served'], + 'total_demand': metrics['total_demand'], + 'serve_ratio': metrics['serve_ratio'] + }) + + return pd.DataFrame(results) + + +def plot_results(results): + """绘制k_min-有效性曲线""" + if not _HAS_MPL: + raise RuntimeError("缺少依赖: matplotlib(无法绘图)。请先安装 matplotlib 再运行绘图部分。") + + fig, axes = plt.subplots(3, 2, figsize=(12, 10)) + + # 找最优点 + best_idx = results['effectiveness'].idxmax() + best_k = results.loc[best_idx, 'k_min'] + best_eff = results.loc[best_idx, 'effectiveness'] + + # 1. 有效性均值 + ax = axes[0, 0] + ax.plot(results['k_min'], results['effectiveness'], 'b-', lw=2) + ax.axvline(best_k, color='r', ls='--', label=f'Best k_min={best_k:.1f}') + ax.scatter([best_k], [best_eff], c='r', s=100, zorder=5) + ax.set_xlabel('k_min') + ax.set_ylabel('Mean Effectiveness') + ax.set_title('Mean Effectiveness vs k_min') + ax.legend() + ax.grid(True, alpha=0.3) + + # 2. 最低10%有效性均值 + ax = axes[0, 1] + ax.plot(results['k_min'], results['bottom10_eff'], 'm-', lw=2) + ax.axvline(best_k, color='r', ls='--') + ax.set_xlabel('k_min') + ax.set_ylabel('Bottom 10% Mean Effectiveness') + ax.set_title('Bottom 10% Mean Effectiveness vs k_min') + ax.grid(True, alpha=0.3) + + # 3. 总服务客户数 + ax = axes[1, 0] + ax.plot(results['k_min'], results['total_served'] / 1000, 'c-', lw=2) + ax.axhline(results['total_demand'].iloc[0] / 1000, color='gray', ls=':', label='Total Demand') + ax.axvline(best_k, color='r', ls='--') + ax.set_xlabel('k_min') + ax.set_ylabel('Served Families (×1000)') + ax.set_title('Total Served vs k_min') + ax.legend() + ax.grid(True, alpha=0.3) + + # 4. 最小有效性 + ax = axes[1, 1] + ax.plot(results['k_min'], results['min_eff'], 'g-', lw=2) + ax.axvline(best_k, color='r', ls='--') + ax.set_xlabel('k_min') + ax.set_ylabel('Min Effectiveness') + ax.set_title('Worst Site Effectiveness vs k_min') + ax.grid(True, alpha=0.3) + + # 5. 未满足需求 vs 浪费 + ax = axes[2, 0] + ax.plot(results['k_min'], results['unmet'] / 1000, 'r-', lw=2, label='Unmet') + ax.plot(results['k_min'], results['waste'] / 1000, 'b-', lw=2, label='Waste') + ax.axvline(best_k, color='gray', ls='--') + ax.set_xlabel('k_min') + ax.set_ylabel('Families (×1000)') + ax.set_title('Unmet Demand vs Wasted Capacity') + ax.legend() + ax.grid(True, alpha=0.3) + + # 6. 有效性标准差 + ax = axes[2, 1] + ax.plot(results['k_min'], results['std_eff'], color='tab:orange', lw=2) + ax.axvline(best_k, color='gray', ls='--') + ax.set_xlabel('k_min') + ax.set_ylabel('Std Effectiveness') + ax.set_title('Effectiveness Std vs k_min') + ax.grid(True, alpha=0.3) + + plt.tight_layout() + plt.savefig('kmin_effectiveness.png', dpi=150) + plt.close(fig) + + return best_k, best_eff + + +def main(): + # 1. 加载数据 + df = load_data() + print(f"站点数: {len(df)}, 总运力: {N_TOTAL}") + print(f"总需求: {df['TotalDemand'].sum():,.0f} 家庭次") + + # 2. 扫描k_min + print("\n扫描 k_min ∈ [1.0, 10.0]...") + results = analyze_kmin_range(df) + + # 3. 绘图 + best_idx = results['effectiveness'].idxmax() + best_k = results.loc[best_idx, 'k_min'] + best_eff = results.loc[best_idx, 'effectiveness'] + if _HAS_MPL: + plot_results(results) + else: + print("\n未检测到 matplotlib,跳过绘图(仍会保存CSV结果)。") + + # 4. 输出最优结果 + print(f"\n最优 k_min = {best_k:.1f}") + print(f"最优有效性 = {best_eff:.4f}") + + # 5. 生成最优方案 + df_opt = allocate_visits(df, best_k, N_TOTAL, C_OPT) + metrics = calc_effectiveness(df_opt) + + print(f"\n最优方案统计:") + print(f" 有效性: {metrics['mean']:.4f} (min={metrics['min']:.4f})") + print(f" 总服务: {metrics['total_served']:,.0f} / {metrics['total_demand']:,.0f} ({metrics['serve_ratio']:.1%})") + print(f" 未满足: {metrics['total_unmet']:,.0f} 家庭次") + print(f" 浪费: {metrics['total_waste']:,.0f} 家庭次") + print(f" 访问次数: [{df_opt['AllocatedVisits'].min()}, {df_opt['AllocatedVisits'].max()}]") + + # 6. 保存 + results.to_csv('kmin_effectiveness_data.csv', index=False) + print("\n已保存: kmin_effectiveness.png, kmin_effectiveness_data.csv") + + +if __name__ == '__main__': + main()