P1: first upload
This commit is contained in:
@@ -1,135 +0,0 @@
|
||||
"""
|
||||
分析:访问总次数是否由每次访问平均需求量决定
|
||||
使用相关性分析和回归分析
|
||||
"""
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# 读取数据
|
||||
df = pd.read_excel('prob/MFP Regular Sites 2019.xlsx')
|
||||
|
||||
# 提取关键列
|
||||
visits = df['Number of Visits in 2019']
|
||||
avg_demand = df['Average Demand per Visit']
|
||||
std_demand = df['StDev(Demand per Visit)']
|
||||
|
||||
print("=" * 60)
|
||||
print("数据基本统计")
|
||||
print("=" * 60)
|
||||
print(f"样本数量: {len(visits)}")
|
||||
print(f"\n访问总次数:")
|
||||
print(f" 均值: {visits.mean():.2f}, 标准差: {visits.std():.2f}")
|
||||
print(f"\n每次访问平均需求量:")
|
||||
print(f" 均值: {avg_demand.mean():.2f}, 标准差: {avg_demand.std():.2f}")
|
||||
|
||||
# 1. 皮尔逊相关系数分析
|
||||
print("\n" + "=" * 60)
|
||||
print("1. 皮尔逊相关系数分析")
|
||||
print("=" * 60)
|
||||
r, p_value = stats.pearsonr(avg_demand, visits)
|
||||
print(f"相关系数 r = {r:.4f}")
|
||||
print(f"p值 = {p_value:.4e}")
|
||||
print(f"决定系数 R² = {r**2:.4f} (可解释{r**2*100:.1f}%的变异)")
|
||||
|
||||
if p_value < 0.05:
|
||||
print("结论: p < 0.05, 相关性显著")
|
||||
else:
|
||||
print("结论: p >= 0.05, 相关性不显著")
|
||||
|
||||
# 2. 线性回归分析
|
||||
print("\n" + "=" * 60)
|
||||
print("2. 线性回归分析 (访问次数 ~ 平均需求量)")
|
||||
print("=" * 60)
|
||||
slope, intercept, r_val, p_val, std_err = stats.linregress(avg_demand, visits)
|
||||
print(f"回归方程: 访问次数 = {slope:.4f} × 平均需求量 + {intercept:.4f}")
|
||||
print(f"斜率标准误: {std_err:.4f}")
|
||||
print(f"p值: {p_val:.4e}")
|
||||
|
||||
# 3. 标准差作为辅助分析
|
||||
print("\n" + "=" * 60)
|
||||
print("3. 标准差辅助分析")
|
||||
print("=" * 60)
|
||||
# 变异系数 (CV) = 标准差/均值, 衡量相对离散程度
|
||||
cv = std_demand / avg_demand
|
||||
print(f"变异系数 (CV = 标准差/均值) 统计:")
|
||||
print(f" 均值: {cv.mean():.4f}")
|
||||
print(f" 范围: {cv.min():.4f} - {cv.max():.4f}")
|
||||
|
||||
# 标准差与访问次数的相关性
|
||||
r_std, p_std = stats.pearsonr(std_demand.dropna(), visits[std_demand.notna()])
|
||||
print(f"\n标准差与访问次数的相关系数: r = {r_std:.4f}, p = {p_std:.4e}")
|
||||
|
||||
# 4. 多元回归 (平均需求量 + 标准差 -> 访问次数)
|
||||
print("\n" + "=" * 60)
|
||||
print("4. 多元回归分析 (同时考虑平均需求量和标准差)")
|
||||
print("=" * 60)
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
|
||||
# 准备数据 (去除缺失值)
|
||||
mask = std_demand.notna()
|
||||
X = np.column_stack([avg_demand[mask], std_demand[mask]])
|
||||
y = visits[mask]
|
||||
|
||||
model = LinearRegression()
|
||||
model.fit(X, y)
|
||||
y_pred = model.predict(X)
|
||||
ss_res = np.sum((y - y_pred) ** 2)
|
||||
ss_tot = np.sum((y - y.mean()) ** 2)
|
||||
r2_multi = 1 - ss_res / ss_tot
|
||||
|
||||
print(f"多元 R² = {r2_multi:.4f} (可解释{r2_multi*100:.1f}%的变异)")
|
||||
print(f"系数: 平均需求量 = {model.coef_[0]:.4f}, 标准差 = {model.coef_[1]:.4f}")
|
||||
print(f"截距: {model.intercept_:.4f}")
|
||||
|
||||
# 5. 总结
|
||||
print("\n" + "=" * 60)
|
||||
print("综合结论")
|
||||
print("=" * 60)
|
||||
if abs(r) < 0.3:
|
||||
strength = "弱"
|
||||
elif abs(r) < 0.7:
|
||||
strength = "中等"
|
||||
else:
|
||||
strength = "强"
|
||||
|
||||
direction = "正" if r > 0 else "负"
|
||||
print(f"• 平均需求量与访问次数呈{strength}{direction}相关 (r={r:.3f})")
|
||||
print(f"• 平均需求量仅能解释访问次数{r**2*100:.1f}%的变异")
|
||||
print(f"• 加入标准差后可解释{r2_multi*100:.1f}%的变异")
|
||||
|
||||
if r**2 < 0.25:
|
||||
print("• 结论: 访问总次数主要不由每次访问平均需求量决定")
|
||||
else:
|
||||
print("• 结论: 每次访问平均需求量对访问总次数有较大影响")
|
||||
|
||||
# 绘图
|
||||
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
|
||||
|
||||
# 散点图 + 回归线
|
||||
ax1 = axes[0]
|
||||
ax1.scatter(avg_demand, visits, alpha=0.6, edgecolors='black', linewidth=0.5)
|
||||
x_line = np.linspace(avg_demand.min(), avg_demand.max(), 100)
|
||||
y_line = slope * x_line + intercept
|
||||
ax1.plot(x_line, y_line, 'r-', linewidth=2, label=f'回归线 (R²={r**2:.3f})')
|
||||
ax1.set_xlabel('Average Demand per Visit (每次访问平均需求量)')
|
||||
ax1.set_ylabel('Number of Visits (访问总次数)')
|
||||
ax1.set_title('访问次数 vs 平均需求量')
|
||||
ax1.legend()
|
||||
ax1.grid(True, alpha=0.3)
|
||||
|
||||
# 残差图
|
||||
ax2 = axes[1]
|
||||
residuals = visits - (slope * avg_demand + intercept)
|
||||
ax2.scatter(avg_demand, residuals, alpha=0.6, edgecolors='black', linewidth=0.5)
|
||||
ax2.axhline(y=0, color='r', linestyle='--', linewidth=2)
|
||||
ax2.set_xlabel('Average Demand per Visit (每次访问平均需求量)')
|
||||
ax2.set_ylabel('Residuals (残差)')
|
||||
ax2.set_title('残差分析')
|
||||
ax2.grid(True, alpha=0.3)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig('analysis_result.png', dpi=150, bbox_inches='tight')
|
||||
print("\n图表已保存至 analysis_result.png")
|
||||
BIN
kmin_effectiveness.png
Normal file
BIN
kmin_effectiveness.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 196 KiB |
263
kmin_effectiveness.py
Normal file
263
kmin_effectiveness.py
Normal file
@@ -0,0 +1,263 @@
|
||||
"""
|
||||
K_min-有效性分析
|
||||
k_min为实数:如2.7表示70%站点最低2次,30%站点最低3次
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
try:
|
||||
import matplotlib
|
||||
|
||||
# 允许在无GUI环境生成图片
|
||||
matplotlib.use("Agg")
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
_HAS_MPL = True
|
||||
except ModuleNotFoundError:
|
||||
plt = None
|
||||
_HAS_MPL = False
|
||||
|
||||
# 参数
|
||||
C_OPT = 250
|
||||
N_TOTAL = 722
|
||||
ALPHA = 0.4
|
||||
BETA = 0.2
|
||||
|
||||
|
||||
def load_data():
|
||||
"""加载数据"""
|
||||
df = pd.read_excel('prob/MFP Regular Sites 2019.xlsx')
|
||||
df = df.drop(columns=['Unnamed: 10', 'Demand per Visit == the number of clients serviced on that visit'])
|
||||
df['TotalDemand'] = df['Average Demand per Visit'] * df['Number of Visits in 2019']
|
||||
return df.sort_values('TotalDemand').reset_index(drop=True)
|
||||
|
||||
|
||||
def allocate_visits(df, k_min_real, n_total, c_opt):
|
||||
"""
|
||||
根据实数k_min分配访问次数
|
||||
|
||||
k_min=2.7: floor=2, ceil=3, 70%站点得2次, 30%站点得3次
|
||||
低需求站点优先获得较低的k_min(已按TotalDemand排序)
|
||||
"""
|
||||
df = df.copy()
|
||||
n = len(df)
|
||||
|
||||
k_floor = int(np.floor(k_min_real))
|
||||
k_ceil = int(np.ceil(k_min_real))
|
||||
frac = k_min_real - k_floor # 获得ceil的比例
|
||||
|
||||
# 分配最低次数:低需求站点得floor,高需求站点得ceil
|
||||
n_ceil = int(round(n * frac))
|
||||
n_floor = n - n_ceil
|
||||
|
||||
k_base = np.array([k_floor] * n_floor + [k_ceil] * n_ceil)
|
||||
df['K_base'] = k_base
|
||||
|
||||
# 计算剩余运力
|
||||
n_reserved = k_base.sum()
|
||||
n_free = n_total - n_reserved
|
||||
|
||||
if n_free < 0:
|
||||
return None
|
||||
|
||||
# 按需求权重分配剩余运力
|
||||
df['Weight'] = df['TotalDemand'] / df['TotalDemand'].sum()
|
||||
df['AllocatedVisits'] = (k_base + n_free * df['Weight'].values).round().astype(int)
|
||||
df['AllocatedVisits'] = np.maximum(df['AllocatedVisits'], k_base)
|
||||
|
||||
# 调整总数
|
||||
diff = n_total - df['AllocatedVisits'].sum()
|
||||
if diff != 0:
|
||||
sorted_idx = df['Weight'].sort_values(ascending=(diff < 0)).index.tolist()
|
||||
for idx in sorted_idx[:abs(diff)]:
|
||||
df.loc[idx, 'AllocatedVisits'] += int(np.sign(diff))
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def calc_effectiveness(df, c_opt=C_OPT, alpha=ALPHA, beta=BETA):
|
||||
"""计算有效性指标"""
|
||||
d = df['Average Demand per Visit'].values
|
||||
k = df['AllocatedVisits'].values
|
||||
D = df['TotalDemand'].values
|
||||
|
||||
# 截断:单次有效服务 = min(需求, 容量)
|
||||
eff_per_visit = np.minimum(d, c_opt)
|
||||
annual_eff = k * eff_per_visit
|
||||
|
||||
# 缺货率、浪费率
|
||||
unmet = np.maximum(0, D - annual_eff) / np.maximum(D, 1)
|
||||
waste = np.maximum(0, k * c_opt - D) / np.maximum(k * c_opt, 1)
|
||||
|
||||
# 有效性得分
|
||||
base = annual_eff / np.maximum(D, 1)
|
||||
score = np.clip(base - alpha * unmet - beta * waste, 0, 1)
|
||||
|
||||
n = score.size
|
||||
bottom_n = max(1, int(np.ceil(n * 0.10)))
|
||||
bottom10_mean = float(np.sort(score)[:bottom_n].mean())
|
||||
|
||||
# 总服务客户数 = Σ min(供给能力, 需求)
|
||||
total_served = np.minimum(k * c_opt, D).sum()
|
||||
total_demand = D.sum()
|
||||
|
||||
return {
|
||||
'mean': score.mean(),
|
||||
'min': score.min(),
|
||||
'bottom10_mean': bottom10_mean,
|
||||
'std': score.std(),
|
||||
'total_unmet': (D - annual_eff).clip(min=0).sum(),
|
||||
'total_waste': (k * c_opt - D).clip(min=0).sum(),
|
||||
'total_served': total_served,
|
||||
'total_demand': total_demand,
|
||||
'serve_ratio': total_served / total_demand
|
||||
}
|
||||
|
||||
|
||||
def analyze_kmin_range(df, k_range=np.arange(1.0, 10.1, 0.1)):
|
||||
"""扫描k_min范围,计算有效性"""
|
||||
results = []
|
||||
|
||||
for k_min in k_range:
|
||||
df_alloc = allocate_visits(df, k_min, N_TOTAL, C_OPT)
|
||||
if df_alloc is None:
|
||||
continue
|
||||
|
||||
metrics = calc_effectiveness(df_alloc)
|
||||
results.append({
|
||||
'k_min': k_min,
|
||||
'effectiveness': metrics['mean'],
|
||||
'min_eff': metrics['min'],
|
||||
'bottom10_eff': metrics['bottom10_mean'],
|
||||
'std_eff': metrics['std'],
|
||||
'unmet': metrics['total_unmet'],
|
||||
'waste': metrics['total_waste'],
|
||||
'total_served': metrics['total_served'],
|
||||
'total_demand': metrics['total_demand'],
|
||||
'serve_ratio': metrics['serve_ratio']
|
||||
})
|
||||
|
||||
return pd.DataFrame(results)
|
||||
|
||||
|
||||
def plot_results(results):
|
||||
"""绘制k_min-有效性曲线"""
|
||||
if not _HAS_MPL:
|
||||
raise RuntimeError("缺少依赖: matplotlib(无法绘图)。请先安装 matplotlib 再运行绘图部分。")
|
||||
|
||||
fig, axes = plt.subplots(3, 2, figsize=(12, 10))
|
||||
|
||||
# 找最优点
|
||||
best_idx = results['effectiveness'].idxmax()
|
||||
best_k = results.loc[best_idx, 'k_min']
|
||||
best_eff = results.loc[best_idx, 'effectiveness']
|
||||
|
||||
# 1. 有效性均值
|
||||
ax = axes[0, 0]
|
||||
ax.plot(results['k_min'], results['effectiveness'], 'b-', lw=2)
|
||||
ax.axvline(best_k, color='r', ls='--', label=f'Best k_min={best_k:.1f}')
|
||||
ax.scatter([best_k], [best_eff], c='r', s=100, zorder=5)
|
||||
ax.set_xlabel('k_min')
|
||||
ax.set_ylabel('Mean Effectiveness')
|
||||
ax.set_title('Mean Effectiveness vs k_min')
|
||||
ax.legend()
|
||||
ax.grid(True, alpha=0.3)
|
||||
|
||||
# 2. 最低10%有效性均值
|
||||
ax = axes[0, 1]
|
||||
ax.plot(results['k_min'], results['bottom10_eff'], 'm-', lw=2)
|
||||
ax.axvline(best_k, color='r', ls='--')
|
||||
ax.set_xlabel('k_min')
|
||||
ax.set_ylabel('Bottom 10% Mean Effectiveness')
|
||||
ax.set_title('Bottom 10% Mean Effectiveness vs k_min')
|
||||
ax.grid(True, alpha=0.3)
|
||||
|
||||
# 3. 总服务客户数
|
||||
ax = axes[1, 0]
|
||||
ax.plot(results['k_min'], results['total_served'] / 1000, 'c-', lw=2)
|
||||
ax.axhline(results['total_demand'].iloc[0] / 1000, color='gray', ls=':', label='Total Demand')
|
||||
ax.axvline(best_k, color='r', ls='--')
|
||||
ax.set_xlabel('k_min')
|
||||
ax.set_ylabel('Served Families (×1000)')
|
||||
ax.set_title('Total Served vs k_min')
|
||||
ax.legend()
|
||||
ax.grid(True, alpha=0.3)
|
||||
|
||||
# 4. 最小有效性
|
||||
ax = axes[1, 1]
|
||||
ax.plot(results['k_min'], results['min_eff'], 'g-', lw=2)
|
||||
ax.axvline(best_k, color='r', ls='--')
|
||||
ax.set_xlabel('k_min')
|
||||
ax.set_ylabel('Min Effectiveness')
|
||||
ax.set_title('Worst Site Effectiveness vs k_min')
|
||||
ax.grid(True, alpha=0.3)
|
||||
|
||||
# 5. 未满足需求 vs 浪费
|
||||
ax = axes[2, 0]
|
||||
ax.plot(results['k_min'], results['unmet'] / 1000, 'r-', lw=2, label='Unmet')
|
||||
ax.plot(results['k_min'], results['waste'] / 1000, 'b-', lw=2, label='Waste')
|
||||
ax.axvline(best_k, color='gray', ls='--')
|
||||
ax.set_xlabel('k_min')
|
||||
ax.set_ylabel('Families (×1000)')
|
||||
ax.set_title('Unmet Demand vs Wasted Capacity')
|
||||
ax.legend()
|
||||
ax.grid(True, alpha=0.3)
|
||||
|
||||
# 6. 有效性标准差
|
||||
ax = axes[2, 1]
|
||||
ax.plot(results['k_min'], results['std_eff'], color='tab:orange', lw=2)
|
||||
ax.axvline(best_k, color='gray', ls='--')
|
||||
ax.set_xlabel('k_min')
|
||||
ax.set_ylabel('Std Effectiveness')
|
||||
ax.set_title('Effectiveness Std vs k_min')
|
||||
ax.grid(True, alpha=0.3)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig('kmin_effectiveness.png', dpi=150)
|
||||
plt.close(fig)
|
||||
|
||||
return best_k, best_eff
|
||||
|
||||
|
||||
def main():
|
||||
# 1. 加载数据
|
||||
df = load_data()
|
||||
print(f"站点数: {len(df)}, 总运力: {N_TOTAL}")
|
||||
print(f"总需求: {df['TotalDemand'].sum():,.0f} 家庭次")
|
||||
|
||||
# 2. 扫描k_min
|
||||
print("\n扫描 k_min ∈ [1.0, 10.0]...")
|
||||
results = analyze_kmin_range(df)
|
||||
|
||||
# 3. 绘图
|
||||
best_idx = results['effectiveness'].idxmax()
|
||||
best_k = results.loc[best_idx, 'k_min']
|
||||
best_eff = results.loc[best_idx, 'effectiveness']
|
||||
if _HAS_MPL:
|
||||
plot_results(results)
|
||||
else:
|
||||
print("\n未检测到 matplotlib,跳过绘图(仍会保存CSV结果)。")
|
||||
|
||||
# 4. 输出最优结果
|
||||
print(f"\n最优 k_min = {best_k:.1f}")
|
||||
print(f"最优有效性 = {best_eff:.4f}")
|
||||
|
||||
# 5. 生成最优方案
|
||||
df_opt = allocate_visits(df, best_k, N_TOTAL, C_OPT)
|
||||
metrics = calc_effectiveness(df_opt)
|
||||
|
||||
print(f"\n最优方案统计:")
|
||||
print(f" 有效性: {metrics['mean']:.4f} (min={metrics['min']:.4f})")
|
||||
print(f" 总服务: {metrics['total_served']:,.0f} / {metrics['total_demand']:,.0f} ({metrics['serve_ratio']:.1%})")
|
||||
print(f" 未满足: {metrics['total_unmet']:,.0f} 家庭次")
|
||||
print(f" 浪费: {metrics['total_waste']:,.0f} 家庭次")
|
||||
print(f" 访问次数: [{df_opt['AllocatedVisits'].min()}, {df_opt['AllocatedVisits'].max()}]")
|
||||
|
||||
# 6. 保存
|
||||
results.to_csv('kmin_effectiveness_data.csv', index=False)
|
||||
print("\n已保存: kmin_effectiveness.png, kmin_effectiveness_data.csv")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user