modify: cleanup project structure and docs

2025-11-02 08:40:28 +08:00
parent a5e3c4c1da
commit 5825cf81b7
19 changed files with 1903 additions and 1125 deletions
--- a/services/forecast.py
+++ b/services/forecast.py
@@ -0,0 +1,139 @@
+from __future__ import annotations
+
+import warnings
+import pandas as pd
+import numpy as np
+import streamlit as st
+import statsmodels.api as sm
+from statsmodels.tsa.arima.model import ARIMA
+from statsmodels.tools.sm_exceptions import ValueWarning
+from sklearn.neighbors import KNeighborsRegressor
+from sklearn.svm import SVR
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import StandardScaler
+
+from config.settings import ARIMA_P, ARIMA_D, ARIMA_Q, MAX_PRE_DAYS
+
+
+@st.cache_data(show_spinner=False)
+def evaluate_arima_model(series, arima_order):
+    try:
+        model = ARIMA(series, order=arima_order)
+        model_fit = model.fit()
+        return model_fit.aic
+    except Exception:
+        return float("inf")
+
+
+@st.cache_data(show_spinner=False)
+def arima_forecast_with_grid_search(accident_series: pd.Series,
+                                    start_date: pd.Timestamp,
+                                    horizon: int = 30,
+                                    p_values: list = tuple(ARIMA_P),
+                                    d_values: list = tuple(ARIMA_D),
+                                    q_values: list = tuple(ARIMA_Q)) -> pd.DataFrame:
+    series = accident_series.asfreq('D').fillna(0)
+    start_date = pd.to_datetime(start_date)
+
+    warnings.filterwarnings("ignore", category=ValueWarning)
+    best_score, best_cfg = float("inf"), None
+    for p in p_values:
+        for d in d_values:
+            for q in q_values:
+                order = (p, d, q)
+                try:
+                    aic = evaluate_arima_model(series, order)
+                    if aic < best_score:
+                        best_score, best_cfg = aic, order
+                except Exception:
+                    continue
+
+    model = ARIMA(series, order=best_cfg)
+    fit = model.fit()
+    forecast_index = pd.date_range(start=start_date, periods=horizon, freq='D')
+    res = fit.get_forecast(steps=horizon)
+    df = res.summary_frame()
+    df.index = forecast_index
+    df.index.name = 'date'
+    df.rename(columns={'mean': 'forecast'}, inplace=True)
+    return df
+
+
+def knn_forecast_counterfactual(accident_series: pd.Series,
+                                intervention_date: pd.Timestamp,
+                                lookback: int = 14,
+                                horizon: int = 30):
+    series = accident_series.asfreq('D').fillna(0)
+    intervention_date = pd.to_datetime(intervention_date).normalize()
+
+    df = pd.DataFrame({'y': series})
+    for i in range(1, lookback + 1):
+        df[f'lag_{i}'] = df['y'].shift(i)
+
+    train = df.loc[:intervention_date - pd.Timedelta(days=1)].dropna()
+    if len(train) < 5:
+        return None, None
+    X_train = train.filter(like='lag_').values
+    y_train = train['y'].values
+    knn = KNeighborsRegressor(n_neighbors=5)
+    knn.fit(X_train, y_train)
+
+    history = df.loc[:intervention_date - pd.Timedelta(days=1), 'y'].tolist()
+    preds = []
+    for _ in range(horizon):
+        if len(history) < lookback:
+            return None, None
+        x = np.array(history[-lookback:][::-1]).reshape(1, -1)
+        pred = knn.predict(x)[0]
+        preds.append(pred)
+        history.append(pred)
+
+    pred_index = pd.date_range(intervention_date, periods=horizon, freq='D')
+    return pd.Series(preds, index=pred_index, name='knn_pred'), None
+
+
+def fit_and_extrapolate(series: pd.Series,
+                        intervention_date: pd.Timestamp,
+                        days: int = 30,
+                        max_pre_days: int = MAX_PRE_DAYS):
+    series = series.asfreq('D').fillna(0)
+    series.index = pd.to_datetime(series.index).tz_localize(None).normalize()
+    intervention_date = pd.to_datetime(intervention_date).tz_localize(None).normalize()
+
+    pre = series.loc[:intervention_date - pd.Timedelta(days=1)]
+    if len(pre) > max_pre_days:
+        pre = pre.iloc[-max_pre_days:]
+    if len(pre) < 3:
+        return None, None, None
+
+    x_pre = np.arange(len(pre))
+    x_future = np.arange(len(pre), len(pre) + days)
+
+    try:
+        X_pre_glm = sm.add_constant(np.column_stack([x_pre, x_pre**2]))
+        glm = sm.GLM(pre.values, X_pre_glm, family=sm.families.Poisson())
+        glm_res = glm.fit()
+        X_future_glm = sm.add_constant(np.column_stack([x_future, x_future**2]))
+        glm_pred = glm_res.predict(X_future_glm)
+    except Exception:
+        glm_pred = None
+
+    try:
+        svr = make_pipeline(StandardScaler(), SVR(kernel='rbf', C=10, gamma=0.1))
+        svr.fit(x_pre.reshape(-1, 1), pre.values)
+        svr_pred = svr.predict(x_future.reshape(-1, 1))
+    except Exception:
+        svr_pred = None
+
+    post_index = pd.date_range(intervention_date, periods=days, freq='D')
+
+    glm_pred = pd.Series(glm_pred, index=post_index, name='glm_pred') if glm_pred is not None else None
+    svr_pred = pd.Series(svr_pred, index=post_index, name='svr_pred') if svr_pred is not None else None
+
+    post = series.reindex(post_index)
+    residuals = None
+    if svr_pred is not None:
+        residuals = pd.Series(post.values - svr_pred[:len(post)], index=post_index, name='residual')
+
+    return glm_pred, svr_pred, residuals
+
--- a/services/hotspot.py
+++ b/services/hotspot.py
@@ -0,0 +1,227 @@
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Iterable
+
+import numpy as np
+import pandas as pd
+
+LOCATION_KEYWORDS: tuple[str, ...] = (
+    "路",
+    "道",
+    "街",
+    "巷",
+    "路口",
+    "交叉口",
+    "大道",
+    "公路",
+    "口",
+)
+AREA_KEYWORDS: tuple[str, ...] = (
+    "新城",
+    "临城",
+    "千岛",
+    "翁山",
+    "海天",
+    "海宇",
+    "定沈",
+    "滨海",
+    "港岛",
+    "体育",
+    "长升",
+    "金岛",
+    "桃湾",
+)
+
+LOCATION_MAPPING: dict[str, str] = {
+    "新城千岛路": "千岛路",
+    "千岛路海天大道": "千岛路海天大道口",
+    "海天大道千岛路": "千岛路海天大道口",
+    "新城翁山路": "翁山路",
+    "翁山路金岛路": "翁山路金岛路口",
+    "海天大道临长路": "海天大道临长路口",
+    "定沈路卫生医院门口": "定沈路医院段",
+    "翁山路海城路西口": "翁山路海城路口",
+    "海宇道路口": "海宇道",
+    "海天大道路口": "海天大道",
+    "定沈路交叉路口": "定沈路",
+    "千岛路路口": "千岛路",
+    "体育路路口": "体育路",
+    "金岛路路口": "金岛路",
+}
+
+SEVERITY_MAP: dict[str, int] = {"财损": 1, "伤人": 2, "亡人": 4}
+
+
+def _extract_road_info(location: str | float | None) -> str:
+    if pd.isna(location):
+        return "未知路段"
+    text = str(location)
+    for keyword in LOCATION_KEYWORDS + AREA_KEYWORDS:
+        if keyword in text:
+            words = text.replace("，", " ").replace(",", " ").split()
+            for word in words:
+                if keyword in word:
+                    return word
+            return text
+    return text[:20] if len(text) > 20 else text
+
+
+def prepare_hotspot_dataset(accident_records: pd.DataFrame) -> pd.DataFrame:
+    df = accident_records.copy()
+    required_defaults: dict[str, str] = {
+        "道路类型": "未知道路类型",
+        "路口路段类型": "未知路段",
+        "事故具体地点": "未知路段",
+        "事故类型": "财损",
+        "所在街道": "未知街道",
+    }
+    for column, default_value in required_defaults.items():
+        if column not in df.columns:
+            df[column] = default_value
+        else:
+            df[column] = df[column].fillna(default_value)
+
+    if "severity" not in df.columns:
+        df["severity"] = df["事故类型"].map(SEVERITY_MAP).fillna(1).astype(int)
+
+    df["事故时间"] = pd.to_datetime(df["事故时间"], errors="coerce")
+    df = df.dropna(subset=["事故时间"]).sort_values("事故时间").reset_index(drop=True)
+    df["standardized_location"] = (
+        df["事故具体地点"].apply(_extract_road_info).replace(LOCATION_MAPPING)
+    )
+    return df
+
+
+def analyze_hotspot_frequency(df: pd.DataFrame, time_window: str = "7D") -> pd.DataFrame:
+    recent_cutoff = df["事故时间"].max() - pd.Timedelta(time_window)
+
+    overall_stats = df.groupby("standardized_location").agg(
+        accident_count=("事故时间", "count"),
+        last_accident=("事故时间", "max"),
+        main_accident_type=("事故类型", _mode_fallback),
+        main_road_type=("道路类型", _mode_fallback),
+        main_intersection_type=("路口路段类型", _mode_fallback),
+        total_severity=("severity", "sum"),
+    )
+
+    recent_stats = (
+        df[df["事故时间"] >= recent_cutoff]
+        .groupby("standardized_location")
+        .agg(
+            recent_count=("事故时间", "count"),
+            recent_accident_type=("事故类型", _mode_fallback),
+            recent_severity=("severity", "sum"),
+        )
+    )
+
+    result = (
+        overall_stats.merge(recent_stats, left_index=True, right_index=True, how="left")
+        .fillna({"recent_count": 0, "recent_severity": 0})
+        .fillna("")
+    )
+    result["recent_count"] = result["recent_count"].astype(int)
+    result["trend_ratio"] = result["recent_count"] / result["accident_count"]
+    result["days_since_last"] = (
+        df["事故时间"].max() - result["last_accident"]
+    ).dt.days.astype(int)
+    result["avg_severity"] = result["total_severity"] / result["accident_count"]
+    return result.sort_values(["recent_count", "accident_count"], ascending=False)
+
+
+def calculate_hotspot_risk_score(hotspot_df: pd.DataFrame) -> pd.DataFrame:
+    df = hotspot_df.copy()
+    if df.empty:
+        return df
+
+    df["frequency_score"] = (df["accident_count"] / df["accident_count"].max() * 40).clip(
+        0, 40
+    )
+    df["trend_score"] = (df["trend_ratio"] * 30).clip(0, 30)
+    severity_map = {"财损": 5, "伤人": 15, "亡人": 20}
+    df["severity_score"] = df["main_accident_type"].map(severity_map).fillna(5)
+    df["urgency_score"] = ((30 - df["days_since_last"]) / 30 * 10).clip(0, 10)
+    df["risk_score"] = (
+        df["frequency_score"]
+        + df["trend_score"]
+        + df["severity_score"]
+        + df["urgency_score"]
+    )
+    conditions = [
+        df["risk_score"] >= 70,
+        df["risk_score"] >= 50,
+        df["risk_score"] >= 30,
+    ]
+    choices = ["高风险", "中风险", "低风险"]
+    df["risk_level"] = np.select(conditions, choices, default="一般风险")
+    return df.sort_values("risk_score", ascending=False)
+
+
+def generate_hotspot_strategies(
+    hotspot_df: pd.DataFrame, time_period: str = "本周"
+) -> list[dict[str, str | float]]:
+    strategies: list[dict[str, str | float]] = []
+    for location_name, location_data in hotspot_df.iterrows():
+        accident_count = float(location_data["accident_count"])
+        recent_count = float(location_data.get("recent_count", 0))
+        accident_type = str(location_data.get("main_accident_type", "财损"))
+        intersection_type = str(location_data.get("main_intersection_type", "普通路段"))
+        trend_ratio = float(location_data.get("trend_ratio", 0))
+        risk_level = str(location_data.get("risk_level", "一般风险"))
+
+        base_info = f"{time_period}对【{location_name}】"
+        data_support = (
+            f"（近期{int(recent_count)}起，累计{int(accident_count)}起，{accident_type}为主）"
+        )
+
+        strategy_parts: list[str] = []
+        if "信号灯" in intersection_type:
+            if accident_type == "财损":
+                strategy_parts.extend(["加强闯红灯查处", "优化信号配时", "整治不按规定让行"])
+            else:
+                strategy_parts.extend(["完善人行过街设施", "加强非机动车管理", "设置警示标志"])
+        elif "普通路段" in intersection_type:
+            strategy_parts.extend(["加强巡逻管控", "整治违法停车", "设置限速标志"])
+        else:
+            strategy_parts.extend(["分析事故成因", "制定综合整治方案"])
+
+        if risk_level == "高风险":
+            strategy_parts.extend(["列为重点整治路段", "开展专项整治行动"])
+        elif risk_level == "中风险":
+            strategy_parts.append("加强日常监管")
+
+        if trend_ratio > 0.4:
+            strategy_parts.append("近期重点监控")
+
+        strategy_text = (
+            base_info + "，" + "，".join(strategy_parts) + data_support
+            if strategy_parts
+            else base_info + "加强交通安全管理" + data_support
+        )
+
+        strategies.append(
+            {
+                "location": location_name,
+                "strategy": strategy_text,
+                "risk_level": risk_level,
+                "accident_count": accident_count,
+                "recent_count": recent_count,
+            }
+        )
+    return strategies
+
+
+def serialise_datetime_columns(df: pd.DataFrame, columns: Iterable[str]) -> pd.DataFrame:
+    result = df.copy()
+    for column in columns:
+        if column in result.columns and pd.api.types.is_datetime64_any_dtype(result[column]):
+            result[column] = result[column].dt.strftime("%Y-%m-%d %H:%M:%S")
+    return result
+
+
+def _mode_fallback(series: pd.Series) -> str:
+    if series.empty:
+        return ""
+    mode = series.mode()
+    return str(mode.iloc[0]) if not mode.empty else str(series.iloc[0])
+
--- a/services/io.py
+++ b/services/io.py
@@ -0,0 +1,270 @@
+from __future__ import annotations
+
+import re
+from typing import Iterable, Mapping
+
+import pandas as pd
+import streamlit as st
+
+COLUMN_ALIASES: Mapping[str, str] = {
+    '事故发生时间': '事故时间',
+    '发生时间': '事故时间',
+    '时间': '事故时间',
+    '街道': '所在街道',
+    '所属街道': '所在街道',
+    '所属辖区': '所在区县',
+    '辖区街道': '所在街道',
+    '事故发生地点': '事故地点',
+    '事故地址': '事故地点',
+    '事故位置': '事故地点',
+    '事故具体地址': '事故具体地点',
+    '案件类型': '事故类型',
+    '事故类别': '事故类型',
+    '事故性质': '事故类型',
+    '事故类型1': '事故类型',
+}
+
+ACCIDENT_TYPE_NORMALIZATION: Mapping[str, str] = {
+    '财产损失': '财损',
+    '财产损失事故': '财损',
+    '一般程序': '伤人',
+    '一般程序事故': '伤人',
+    '伤人事故': '伤人',
+    '造成人员受伤': '伤人',
+    '造成人员死亡': '亡人',
+    '死亡事故': '亡人',
+    '亡人事故': '亡人',
+    '亡人死亡': '亡人',
+    '号': '财损',
+}
+
+REGION_FROM_LOCATION_PATTERN = re.compile(r'([一-龥]{2,8}(街道|新区|开发区|镇|区))')
+
+REGION_NORMALIZATION: Mapping[str, str] = {
+    '临城中队': '临城街道',
+    '临城新区': '临城街道',
+    '临城': '临城街道',
+    '新城': '临城街道',
+    '千岛中队': '千岛街道',
+    '千岛新区': '千岛街道',
+    '千岛': '千岛街道',
+    '沈家门中队': '沈家门街道',
+    '沈家门': '沈家门街道',
+    '普陀城区': '沈家门街道',
+    '普陀': '沈家门街道',
+}
+
+
+def _clean_text(series: pd.Series) -> pd.Series:
+    """Strip whitespace and normalise obvious null placeholders."""
+    cleaned = series.astype(str).str.strip()
+    null_tokens = {'', 'nan', 'NaN', 'None', 'NULL', '<NA>', '无', '—'}
+    return cleaned.mask(cleaned.isin(null_tokens))
+
+
+def _maybe_seek_start(file_obj) -> None:
+    if hasattr(file_obj, "seek"):
+        try:
+            file_obj.seek(0)
+        except Exception:  # pragma: no cover - guard against non file-likes
+            pass
+
+
+def _prepare_sheet(df: pd.DataFrame) -> pd.DataFrame:
+    """Standardise a single sheet from the事故数据 workbook."""
+    if df is None or df.empty:
+        return pd.DataFrame()
+
+    sheet = df.copy()
+    # Normalise column names first
+    sheet.columns = [str(col).strip() for col in sheet.columns]
+    # If 栏目 still not recognised, attempt to locate header row inside the data
+    if '事故时间' not in sheet.columns and '事故发生时间' not in sheet.columns:
+        header_row = None
+        for idx, row in sheet.iterrows():
+            values = [str(cell).strip() for cell in row.tolist()]
+            if '事故时间' in values or '事故发生时间' in values or '报警时间' in values:
+                header_row = idx
+                break
+        if header_row is not None:
+            sheet.columns = [str(x).strip() for x in sheet.iloc[header_row].tolist()]
+            sheet = sheet.iloc[header_row + 1 :].reset_index(drop=True)
+            sheet.columns = [str(col).strip() for col in sheet.columns]
+
+    # Apply aliases after potential header relocation
+    sheet = sheet.rename(columns={src: dst for src, dst in COLUMN_ALIASES.items() if src in sheet.columns})
+
+    return sheet
+
+
+def _coalesce_columns(df: pd.DataFrame, columns: Iterable[str]) -> pd.Series:
+    result = pd.Series(pd.NA, index=df.index, dtype="object")
+    for col in columns:
+        if col in df.columns:
+            candidate = _clean_text(df[col])
+            result = result.fillna(candidate)
+    return result
+
+
+def _infer_region_from_location(location: str) -> str | None:
+    if pd.isna(location):
+        return None
+    text = str(location).strip()
+    if not text:
+        return None
+    match = REGION_FROM_LOCATION_PATTERN.search(text)
+    if match:
+        return match.group(1)
+    return None
+
+
+def _normalise_region_series(series: pd.Series) -> pd.Series:
+    return series.map(lambda val: REGION_NORMALIZATION.get(val, val) if pd.notna(val) else val)
+
+
+def load_accident_records(accident_file, *, require_location: bool = False) -> pd.DataFrame:
+    """
+    Load accident records from the updated Excel template.
+
+    The function supports workbooks with a single sheet (e.g. sample/事故处理/事故2021-2022.xlsx)
+    as well as legacy multi-sheet formats where the header row might sit within the data.
+    """
+    _maybe_seek_start(accident_file)
+    sheets = pd.read_excel(accident_file, sheet_name=None)
+    if isinstance(sheets, dict):
+        frames = [frame for frame in ( _prepare_sheet(df) for df in sheets.values() ) if not frame.empty]
+    else:  # pragma: no cover - pandas only returns dict when sheet_name=None, but keep guard
+        frames = [_prepare_sheet(sheets)]
+
+    if not frames:
+        raise ValueError("未在上传的事故数据中检测到有效的事故记录，请确认文件内容。")
+
+    accident_df = pd.concat(frames, ignore_index=True)
+
+    # Normalise columns of interest
+    if '事故时间' not in accident_df.columns and '报警时间' in accident_df.columns:
+        accident_df['事故时间'] = accident_df['报警时间']
+
+    if '事故时间' not in accident_df.columns:
+        raise ValueError("事故数据缺少“事故时间”字段，请确认模板是否为最新版本。")
+
+    accident_df['事故时间'] = pd.to_datetime(accident_df['事故时间'], errors='coerce')
+
+    # Location harmonisation (used for both region inference and hotspot analysis)
+    location_columns_available = [col for col in ['事故具体地点', '事故地点'] if col in accident_df.columns]
+    location_series = _coalesce_columns(accident_df, ['事故具体地点', '事故地点'])
+
+    # Region handling
+    region = _coalesce_columns(accident_df, ['所在街道', '所属街道', '所在区县', '辖区中队'])
+    # Infer region from location fields when still missing
+    if region.isna().any():
+        inferred = location_series.map(_infer_region_from_location)
+        region = region.fillna(inferred)
+        region = region.fillna(_clean_text(location_series))
+
+    region_clean = _clean_text(region)
+    accident_df['所在街道'] = _normalise_region_series(region_clean)
+
+    # Accident type normalisation
+    accident_type = _coalesce_columns(accident_df, ['事故类型', '事故类别', '事故性质'])
+    accident_type = accident_type.replace(ACCIDENT_TYPE_NORMALIZATION)
+    accident_type = _clean_text(accident_type).replace(ACCIDENT_TYPE_NORMALIZATION)
+    accident_df['事故类型'] = accident_type.fillna('财损')
+
+    # Location column harmonisation
+    if require_location and not location_columns_available and location_series.isna().all():
+        raise ValueError("事故数据缺少“事故具体地点”字段，请确认模板是否与 sample/事故处理 中示例一致。")
+    accident_df['事故具体地点'] = _clean_text(location_series)
+
+    # Drop records with missing core fields
+    subset = ['事故时间', '所在街道', '事故类型']
+    if require_location:
+        subset.append('事故具体地点')
+    accident_df = accident_df.dropna(subset=subset)
+
+    # Severity score
+    severity_map = {'财损': 1, '伤人': 2, '亡人': 4}
+    accident_df['severity'] = accident_df['事故类型'].map(severity_map).fillna(1).astype(int)
+
+    accident_df = accident_df.sort_values('事故时间').reset_index(drop=True)
+
+    return accident_df
+
+
+@st.cache_data(show_spinner=False)
+def load_and_clean_data(accident_file, strategy_file):
+    accident_records = load_accident_records(accident_file)
+
+    accident_data = accident_records.rename(
+        columns={'事故时间': 'date_time', '所在街道': 'region', '事故类型': 'category'}
+    )
+
+    _maybe_seek_start(strategy_file)
+    strategy_df = pd.read_excel(strategy_file)
+    strategy_df = strategy_df.rename(columns=lambda col: str(col).strip())
+    if '发布时间' not in strategy_df.columns:
+        raise ValueError("策略数据缺少“发布时间”字段，请确认文件格式。")
+
+    strategy_df['发布时间'] = pd.to_datetime(strategy_df['发布时间'], errors='coerce')
+    if '交通策略类型' not in strategy_df.columns:
+        raise ValueError("策略数据缺少“交通策略类型”字段，请确认文件格式。")
+
+    strategy_df['交通策略类型'] = _clean_text(strategy_df['交通策略类型'])
+    strategy_df = strategy_df.dropna(subset=['发布时间', '交通策略类型'])
+
+    accident_data = accident_data[['date_time', 'region', 'category', 'severity']]
+    strategy_df = strategy_df[['发布时间', '交通策略类型']].rename(
+        columns={'发布时间': 'date_time', '交通策略类型': 'strategy_type'}
+    )
+
+    return accident_data, strategy_df
+
+
+@st.cache_data(show_spinner=False)
+def aggregate_daily_data(accident_data: pd.DataFrame, strategy_data: pd.DataFrame) -> pd.DataFrame:
+    accident_data = accident_data.copy()
+    strategy_data = strategy_data.copy()
+
+    accident_data['date'] = accident_data['date_time'].dt.date
+    daily_accidents = accident_data.groupby('date').agg(
+        accident_count=('date_time', 'count'),
+        severity=('severity', 'sum')
+    )
+    daily_accidents.index = pd.to_datetime(daily_accidents.index)
+
+    strategy_data['date'] = strategy_data['date_time'].dt.date
+    daily_strategies = strategy_data.groupby('date')['strategy_type'].apply(list)
+    daily_strategies.index = pd.to_datetime(daily_strategies.index)
+
+    combined = daily_accidents.join(daily_strategies, how='left')
+    combined['strategy_type'] = combined['strategy_type'].apply(lambda x: x if isinstance(x, list) else [])
+    combined = combined.asfreq('D')
+    combined[['accident_count', 'severity']] = combined[['accident_count', 'severity']].fillna(0)
+    combined['strategy_type'] = combined['strategy_type'].apply(lambda x: x if isinstance(x, list) else [])
+    return combined
+
+
+@st.cache_data(show_spinner=False)
+def aggregate_daily_data_by_region(accident_data: pd.DataFrame, strategy_data: pd.DataFrame) -> pd.DataFrame:
+    df = accident_data.copy()
+    df['date'] = df['date_time'].dt.date
+    g = df.groupby(['region', 'date']).agg(
+        accident_count=('date_time', 'count'),
+        severity=('severity', 'sum')
+    )
+    g.index = g.index.set_levels([g.index.levels[0], pd.to_datetime(g.index.levels[1])])
+    g = g.sort_index()
+
+    s = strategy_data.copy()
+    s['date'] = s['date_time'].dt.date
+    daily_strategies = s.groupby('date')['strategy_type'].apply(list)
+    daily_strategies.index = pd.to_datetime(daily_strategies.index)
+
+    regions = g.index.get_level_values(0).unique()
+    dates = pd.date_range(g.index.get_level_values(1).min(), g.index.get_level_values(1).max(), freq='D')
+    full_index = pd.MultiIndex.from_product([regions, dates], names=['region', 'date'])
+    g = g.reindex(full_index).fillna(0)
+
+    strat_map = daily_strategies.to_dict()
+    g = g.assign(strategy_type=[strat_map.get(d, []) for d in g.index.get_level_values('date')])
+    return g
--- a/services/metrics.py
+++ b/services/metrics.py
@@ -0,0 +1,86 @@
+from __future__ import annotations
+
+import numpy as np
+import pandas as pd
+from sklearn.metrics import mean_absolute_error, mean_squared_error
+from statsmodels.tsa.arima.model import ARIMA
+import streamlit as st
+
+
+@st.cache_data(show_spinner=False)
+def evaluate_models(series: pd.Series,
+                    horizon: int = 30,
+                    lookback: int = 14,
+                    p_values: range = range(0, 4),
+                    d_values: range = range(0, 2),
+                    q_values: range = range(0, 4)) -> pd.DataFrame:
+    """
+    留出法（最后 horizon 天作为验证集）比较 ARIMA / KNN / GLM / SVR，
+    输出 MAE・RMSE・MAPE，并按 RMSE 升序排序。
+    """
+    series = series.asfreq('D').fillna(0)
+    if len(series) <= horizon + 10:
+        raise ValueError("序列太短，无法留出 %d 天进行评估。" % horizon)
+
+    train, test = series.iloc[:-horizon], series.iloc[-horizon:]
+
+    def _to_series_like(pred, a_index):
+        if isinstance(pred, pd.Series):
+            return pred.reindex(a_index)
+        return pd.Series(pred, index=a_index)
+
+    def _metrics(a: pd.Series, p) -> dict:
+        p = _to_series_like(p, a.index).astype(float)
+        a = a.astype(float)
+        mae = mean_absolute_error(a, p)
+        try:
+            rmse = mean_squared_error(a, p, squared=False)
+        except TypeError:
+            rmse = mean_squared_error(a, p) ** 0.5
+        mape = np.nanmean(np.abs((a - p) / np.where(a == 0, np.nan, a))) * 100
+        return {"MAE": mae, "RMSE": rmse, "MAPE": mape}
+
+    results = {}
+
+    best_aic, best_order = float('inf'), (1, 0, 1)
+    for p in p_values:
+        for d in d_values:
+            for q in q_values:
+                try:
+                    aic = ARIMA(train, order=(p, d, q)).fit().aic
+                    if aic < best_aic:
+                        best_aic, best_order = aic, (p, d, q)
+                except Exception:
+                    continue
+    arima_train = train.asfreq('D').fillna(0)
+    arima_pred = ARIMA(arima_train, order=best_order).fit().forecast(steps=horizon)
+    results['ARIMA'] = _metrics(test, arima_pred)
+
+    # Import local utilities to avoid circular dependencies
+    from services.forecast import knn_forecast_counterfactual, fit_and_extrapolate
+
+    try:
+        knn_pred, _ = knn_forecast_counterfactual(series,
+                                                  train.index[-1] + pd.Timedelta(days=1),
+                                                  lookback=lookback,
+                                                  horizon=horizon)
+        if knn_pred is not None:
+            results['KNN'] = _metrics(test, knn_pred)
+    except Exception:
+        pass
+
+    try:
+        glm_pred, svr_pred, _ = fit_and_extrapolate(series,
+                                                    train.index[-1] + pd.Timedelta(days=1),
+                                                    days=horizon)
+        if glm_pred is not None:
+            results['GLM'] = _metrics(test, glm_pred)
+        if svr_pred is not None:
+            results['SVR'] = _metrics(test, svr_pred)
+    except Exception:
+        pass
+
+    return (pd.DataFrame(results)
+            .T.sort_values('RMSE')
+            .round(3))
+
--- a/services/strategy.py
+++ b/services/strategy.py
@@ -0,0 +1,157 @@
+from __future__ import annotations
+
+import pandas as pd
+import streamlit as st
+
+from services.forecast import fit_and_extrapolate, arima_forecast_with_grid_search
+from config.settings import MIN_PRE_DAYS, MAX_PRE_DAYS
+
+
+def evaluate_strategy_effectiveness(actual_series: pd.Series,
+                                    counterfactual_series: pd.Series,
+                                    severity_series: pd.Series,
+                                    strategy_date: pd.Timestamp,
+                                    window: int = 30):
+    strategy_date = pd.to_datetime(strategy_date)
+    window_end = strategy_date + pd.Timedelta(days=window - 1)
+    pre_sev = severity_series.loc[strategy_date - pd.Timedelta(days=window):strategy_date - pd.Timedelta(days=1)].sum()
+    post_sev = severity_series.loc[strategy_date:window_end].sum()
+    actual_post = actual_series.loc[strategy_date:window_end]
+    counter_post = counterfactual_series.loc[strategy_date:window_end].reindex(actual_post.index)
+    window_len = len(actual_post)
+    if window_len == 0:
+        return False, False, (0.0, 0.0), '三级'
+    effective_days = (actual_post < counter_post).sum()
+    count_effective = effective_days >= (window_len / 2)
+    severity_effective = post_sev < pre_sev
+    cf_sum = counter_post.sum()
+    F1 = (cf_sum - actual_post.sum()) / cf_sum if cf_sum > 0 else 0.0
+    F2 = (pre_sev - post_sev) / pre_sev if pre_sev > 0 else 0.0
+    if F1 > 0.5 and F2 > 0.5:
+        safety_state = '一级'
+    elif F1 > 0.3:
+        safety_state = '二级'
+    else:
+        safety_state = '三级'
+    return count_effective, severity_effective, (F1, F2), safety_state
+
+
+@st.cache_data(show_spinner=False)
+def generate_output_and_recommendations(combined_data: pd.DataFrame,
+                                        strategy_types: list,
+                                        region: str = '全市',
+                                        horizon: int = 30):
+    results = {}
+    combined_data = combined_data.copy().asfreq('D')
+    combined_data[['accident_count','severity']] = combined_data[['accident_count','severity']].fillna(0)
+    combined_data['strategy_type'] = combined_data['strategy_type'].apply(lambda x: x if isinstance(x, list) else [])
+
+    acc_full = combined_data['accident_count']
+    sev_full = combined_data['severity']
+
+    max_fit_days = max(horizon + 60, MAX_PRE_DAYS)
+
+    for strategy in strategy_types:
+        has_strategy = combined_data['strategy_type'].apply(lambda x: strategy in x)
+        if not has_strategy.any():
+            continue
+        candidate_dates = has_strategy[has_strategy].index
+        intervention_date = None
+        fit_start_dt = None
+        for dt in candidate_dates:
+            fit_start_dt = max(acc_full.index.min(), dt - pd.Timedelta(days=max_fit_days))
+            pre_hist = acc_full.loc[fit_start_dt:dt - pd.Timedelta(days=1)]
+            if len(pre_hist) >= MIN_PRE_DAYS:
+                intervention_date = dt
+                break
+        if intervention_date is None:
+            intervention_date = candidate_dates[0]
+            fit_start_dt = max(acc_full.index.min(), intervention_date - pd.Timedelta(days=max_fit_days))
+
+        acc = acc_full.loc[fit_start_dt:]
+        sev = sev_full.loc[fit_start_dt:]
+        horizon_eff = max(7, min(horizon, len(acc.loc[intervention_date:]) ))
+
+        glm_pred, svr_pred, residuals = fit_and_extrapolate(acc, intervention_date, days=horizon_eff)
+
+        counter = None
+        if svr_pred is not None:
+            counter = svr_pred
+        elif glm_pred is not None:
+            counter = glm_pred
+        else:
+            try:
+                arima_df = arima_forecast_with_grid_search(acc.loc[:intervention_date],
+                                                           start_date=intervention_date + pd.Timedelta(days=1),
+                                                           horizon=horizon_eff)
+                counter = pd.Series(arima_df['forecast'].values, index=arima_df.index, name='cf_arima')
+                residuals = (acc.reindex(counter.index) - counter)
+            except Exception:
+                counter = None
+        if counter is None:
+            continue
+
+        count_eff, sev_eff, (F1, F2), state = evaluate_strategy_effectiveness(
+            actual_series=acc,
+            counterfactual_series=counter,
+            severity_series=sev,
+            strategy_date=intervention_date,
+            window=horizon_eff
+        )
+        results[strategy] = {
+            'effect_strength': float(residuals.dropna().mean()) if residuals is not None else 0.0,
+            'adaptability': float(F1 + F2),
+            'count_effective': bool(count_eff),
+            'severity_effective': bool(sev_eff),
+            'safety_state': state,
+            'F1': float(F1),
+            'F2': float(F2),
+            'intervention_date': str(intervention_date.date())
+        }
+
+    # Secondary attempt with 14-day window if no results
+    if not results:
+        for strategy in strategy_types:
+            has_strategy = combined_data['strategy_type'].apply(lambda x: strategy in x)
+            if not has_strategy.any():
+                continue
+            intervention_date = has_strategy[has_strategy].index[0]
+            glm_pred, svr_pred, residuals = fit_and_extrapolate(acc_full, intervention_date, days=14)
+            counter = None
+            if svr_pred is not None:
+                counter = svr_pred
+            elif glm_pred is not None:
+                counter = glm_pred
+            else:
+                try:
+                    arima_df = arima_forecast_with_grid_search(acc_full.loc[:intervention_date],
+                                                               start_date=intervention_date + pd.Timedelta(days=1),
+                                                               horizon=14)
+                    counter = pd.Series(arima_df['forecast'].values, index=arima_df.index, name='cf_arima')
+                    residuals = (acc_full.reindex(counter.index) - counter)
+                except Exception:
+                    counter = None
+            if counter is None:
+                continue
+            count_eff, sev_eff, (F1, F2), state = evaluate_strategy_effectiveness(
+                actual_series=acc_full,
+                counterfactual_series=counter,
+                severity_series=sev_full,
+                strategy_date=intervention_date,
+                window=14
+            )
+            results[strategy] = {
+                'effect_strength': float(residuals.dropna().mean()) if residuals is not None else 0.0,
+                'adaptability': float(F1 + F2),
+                'count_effective': bool(count_eff),
+                'severity_effective': bool(sev_eff),
+                'safety_state': state,
+                'F1': float(F1),
+                'F2': float(F2),
+                'intervention_date': str(intervention_date.date())
+            }
+
+    best_strategy = max(results, key=lambda x: results[x]['adaptability']) if results else None
+    recommendation = f"建议在{region}区域长期实施策略类型 {best_strategy}" if best_strategy else "无足够数据推荐策略"
+    return results, recommendation
+