modify: 更新了requirements.txt

modify: 删除隐私文件
2025-10-10 08:14:46 +08:00 · 2025-10-10 08:13:32 +08:00 · 2025-10-10 08:13:22 +08:00 · 2025-10-10 08:12:07 +08:00 · 2025-10-10 08:10:25 +08:00 · 2025-10-10 07:54:45 +08:00
8 changed files with 722 additions and 42 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,7 @@
+sample/
+strategy_evaluation_results.csv
+run_metadata.json
+*.log
+simulation.html
+.DS_Store
+overview_series.html
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,40 @@
 # Changelog

+## [1.1.0] - 2025-08-28
+
+### Added
+- Integrated GPT-based analysis for comprehensive traffic safety insights
+- Added automated report generation with AI-powered recommendations
+- Implemented natural language query processing for data exploration
+- Added export functionality for analysis reports (PDF/CSV formats)
+- Included sentiment analysis for accident description texts
+
+### Enhanced
+- Improved data visualization with interactive charts and heatmaps
+- Optimized prediction algorithms with enhanced machine learning models
+- Expanded dataset with additional traffic parameters and weather conditions
+- Upgraded user interface with responsive design and dark mode support
+
+### Fixed
+- Resolved session state KeyError in multi-tab navigation
+- Fixed data persistence issues between application refreshes
+- Corrected timestamp parsing errors in accident data import
+- Addressed memory leaks in large dataset processing
+
+### Documentation
+- Updated README with new GPT analysis features and usage examples
+- Added API documentation for extended functionality
+- Included sample datasets and tutorial guides
+
+## [1.0.0] - 2025-08-19
+
+### Added
+- Initial release of TrafficSafeAnalyzer
+- Streamlit app with tabs for data analysis, prediction, and strategy evaluation
+
+### Fixed
+- Resolved session state KeyError
+
 ## [1.0.0] - 2025-08-19

 ### Added
--- a/app.py
+++ b/app.py
@@ -1,3 +1,4 @@
+
 import os
 from datetime import datetime, timedelta
 import json
@@ -31,6 +32,13 @@ try:
 except Exception:
    HAS_AUTOREFRESH = False

+# Add import for OpenAI API
+try:
+    from openai import OpenAI
+    HAS_OPENAI = True
+except Exception:
+    HAS_OPENAI = False
+

 # =======================
 # 1. Data Integration
@@ -540,6 +548,177 @@ def evaluate_models(series: pd.Series,
            .round(3))


+import re
+from collections import Counter
+import jieba
+
+def parse_and_standardize_locations(accident_data):
+    """解析和标准化事故地点"""
+    df = accident_data.copy()
+    
+    # 提取关键路段信息
+    def extract_road_info(location):
+        if pd.isna(location):
+            return "未知路段"
+        
+        location = str(location)
+        
+        # 常见路段关键词
+        road_keywords = ['路', '道', '街', '巷', '路口', '交叉口', '大道', '公路']
+        area_keywords = ['新城', '临城', '千岛', '翁山', '海天', '海宇', '定沈', '滨海', '港岛', '体育', '长升', '金岛', '桃湾']
+        
+        # 提取包含关键词的路段
+        for keyword in road_keywords + area_keywords:
+            if keyword in location:
+                # 提取以该关键词为中心的路段名称
+                pattern = f'[^，。]*{keyword}[^，。]*'
+                matches = re.findall(pattern, location)
+                if matches:
+                    return matches[0].strip()
+        
+        return location
+
+    df['standardized_location'] = df['事故具体地点'].apply(extract_road_info)
+    
+    # 进一步清理和标准化
+    location_mapping = {
+        '新城千岛路': '千岛路',
+        '千岛路海天大道': '千岛路海天大道口',
+        '海天大道千岛路': '千岛路海天大道口',
+        '新城翁山路': '翁山路',
+        '翁山路金岛路': '翁山路金岛路口',
+        # 添加更多标准化映射...
+    }
+    
+    df['standardized_location'] = df['standardized_location'].replace(location_mapping)
+    
+    return df
+
+def analyze_location_frequency(accident_data, time_window='7D'):
+    """分析地点事故频次"""
+    df = parse_and_standardize_locations(accident_data)
+    
+    # 计算时间窗口
+    recent_cutoff = df['事故时间'].max() - pd.Timedelta(time_window)
+    
+    # 总体统计
+    overall_stats = df.groupby('standardized_location').agg({
+        '事故时间': ['count', 'max'],  # 事故总数和最近时间
+        '事故类型': lambda x: x.mode()[0] if len(x.mode()) > 0 else '财损',
+        '道路类型': lambda x: x.mode()[0] if len(x.mode()) > 0 else '城区道路',
+        '路口路段类型': lambda x: x.mode()[0] if len(x.mode()) > 0 else '普通路段'
+    })
+    
+    # 扁平化列名
+    overall_stats.columns = ['accident_count', 'last_accident', 'main_accident_type', 'main_road_type', 'main_intersection_type']
+    
+    # 近期统计
+    recent_accidents = df[df['事故时间'] >= recent_cutoff]
+    recent_stats = recent_accidents.groupby('standardized_location').agg({
+        '事故时间': 'count',
+        '事故类型': lambda x: x.mode()[0] if len(x.mode()) > 0 else '财损'
+    }).rename(columns={'事故时间': 'recent_count', '事故类型': 'recent_accident_type'})
+    
+    # 合并数据
+    result = overall_stats.merge(recent_stats, left_index=True, right_index=True, how='left').fillna(0)
+    result['recent_count'] = result['recent_count'].astype(int)
+    
+    # 计算趋势指标
+    result['trend_ratio'] = result['recent_count'] / result['accident_count']
+    result['days_since_last'] = (df['事故时间'].max() - result['last_accident']).dt.days
+    
+    return result.sort_values(['recent_count', 'accident_count'], ascending=False)
+
+
+def generate_intelligent_strategies(hotspot_df, time_period='本周'):
+    """生成智能针对性策略"""
+    strategies = []
+    
+    for location_name, location_data in hotspot_df.iterrows():
+        accident_count = location_data['accident_count']
+        recent_count = location_data['recent_count']
+        accident_type = location_data['main_accident_type']
+        road_type = location_data['main_road_type']
+        intersection_type = location_data['main_intersection_type']
+        trend_ratio = location_data['trend_ratio']
+        
+        # 基础信息
+        base_info = f"{time_period}对【{location_name}】"
+        data_support = f"（近期{int(recent_count)}起，累计{int(accident_count)}起，{accident_type}为主）"
+        
+        # 智能策略生成
+        strategy_parts = []
+        
+        # 基于事故类型
+        if accident_type == '财损':
+            strategy_parts.append("加强违法查处")
+            if '信号灯' in intersection_type:
+                strategy_parts.append("整治闯红灯、不按规定让行")
+            else:
+                strategy_parts.append("整治违法变道、超速行驶")
+        elif accident_type == '伤人':
+            strategy_parts.append("优化交通组织")
+            strategy_parts.append("增设安全设施")
+            if recent_count >= 2:
+                strategy_parts.append("开展专项整治")
+        
+        # 基于路口类型
+        if intersection_type == '信号灯路口':
+            strategy_parts.append("优化信号配时")
+        elif intersection_type == '非信号灯路口':
+            strategy_parts.append("完善让行标志")
+        elif intersection_type == '普通路段':
+            if trend_ratio > 0.3:  # 近期事故占比高
+                strategy_parts.append("加强巡逻管控")
+        
+        # 基于趋势
+        if trend_ratio > 0.5:
+            strategy_parts.append("列为重点管控路段")
+        if location_data['days_since_last'] <= 3:
+            strategy_parts.append("近期需重点关注")
+        
+        # 组合策略
+        if strategy_parts:
+            strategy = base_info + "，" + "，".join(strategy_parts) + data_support
+        else:
+            strategy = base_info + "分析事故成因，制定综合整治方案" + data_support
+        
+        strategies.append(strategy)
+    
+    return strategies
+
+def calculate_location_risk_score(hotspot_df):
+    """计算路口风险评分"""
+    df = hotspot_df.copy()
+    
+    # 事故频次得分 (0-40分)
+    df['frequency_score'] = (df['accident_count'] / df['accident_count'].max() * 40).clip(0, 40)
+    
+    # 近期趋势得分 (0-30分)
+    df['trend_score'] = (df['trend_ratio'] * 30).clip(0, 30)
+    
+    # 事故严重度得分 (0-20分)
+    severity_map = {'财损': 5, '伤人': 15, '亡人': 20}
+    df['severity_score'] = df['main_accident_type'].map(severity_map).fillna(5)
+    
+    # 时间紧迫度得分 (0-10分)
+    df['urgency_score'] = ((30 - df['days_since_last']) / 30 * 10).clip(0, 10)
+    
+    # 总分
+    df['risk_score'] = df['frequency_score'] + df['trend_score'] + df['severity_score'] + df['urgency_score']
+    
+    # 风险等级
+    conditions = [
+        df['risk_score'] >= 70,
+        df['risk_score'] >= 50,
+        df['risk_score'] >= 30
+    ]
+    choices = ['高风险', '中风险', '低风险']
+    df['risk_level'] = np.select(conditions, choices, default='一般风险')
+    
+    return df.sort_values('risk_score', ascending=False)
+
+
 # =======================
 # 4. App
 # =======================
@@ -579,6 +758,12 @@ def run_streamlit_app():
    elif auto and not HAS_AUTOREFRESH:
        st.sidebar.info("未安装 `streamlit-autorefresh`，请使用上方“重新运行”按钮或关闭再开启此开关。")

+    # Add OpenAI API key input in sidebar
+    st.sidebar.markdown("---")
+    st.sidebar.subheader("GPT API 配置")
+    openai_api_key = st.sidebar.text_input("GPT API Key", value='sk-dQhKOOG48iVEfgJfAb14458dA4474fB09aBbE8153d4aB3Fc', type="password", help="用于GPT分析结果的API密钥")
+    open_ai_base_url = st.sidebar.text_input("GPT Base Url", value='https://az.gptplus5.com/v1', type='default')
+
    # Initialize session state to store processed data
    if 'processed_data' not in st.session_state:
        st.session_state['processed_data'] = {
@@ -686,11 +871,399 @@ def run_streamlit_app():
        with meta_col2:
            st.caption(f"🕒 最近刷新：{last_refresh.strftime('%Y-%m-%d %H:%M:%S')}")

-        # Tabs (unchanged from original)
-        tab_dash, tab_pred, tab7, tab3, tab4, tab5, tab6 = st.tabs(
-            ["🏠 总览", "📈 预测模型", "📊 模型评估", "⚠️ 异常检测", "📝 策略评估", "⚖️ 策略对比", "🧪 情景模拟"]
+        # Tabs (add new tab for GPT analysis)
+        tab_dash, tab_pred, tab_eval, tab_anom, tab_strat, tab_comp, tab_sim, tab_gpt, tab_hotspot = st.tabs(
+            ["🏠 总览", "📈 预测模型", "📊 模型评估", "⚠️ 异常检测", "📝 策略评估", "⚖️ 策略对比", "🧪 情景模拟", "🔍 GPT 分析", "📍 事故热点"]
        )

+
+        with tab_hotspot:
+            st.header("📍 事故多发路口分析")
+            st.markdown("独立分析事故数据，识别高风险路口并生成针对性策略")
+            
+            # 独立文件上传
+            st.subheader("📁 数据上传")
+            hotspot_file = st.file_uploader("上传事故数据文件", type=['xlsx'], key="hotspot_uploader")
+            
+            if hotspot_file is not None:
+                try:
+                    # 加载数据
+                    @st.cache_data(show_spinner=False)
+                    def load_hotspot_data(uploaded_file):
+                        """独立加载事故热点分析数据"""
+                        df = pd.read_excel(uploaded_file, sheet_name=None)
+                        accident_data = pd.concat(df.values(), ignore_index=True)
+                        
+                        # 数据清洗和预处理
+                        accident_data['事故时间'] = pd.to_datetime(accident_data['事故时间'])
+                        accident_data = accident_data.dropna(subset=['事故时间', '所在街道', '事故类型', '事故具体地点'])
+                        
+                        # 添加严重度评分
+                        severity_map = {'财损': 1, '伤人': 2, '亡人': 4}
+                        accident_data['severity'] = accident_data['事故类型'].map(severity_map).fillna(1)
+                        
+                        return accident_data
+                    
+                    with st.spinner("正在加载数据..."):
+                        accident_data = load_hotspot_data(hotspot_file)
+                    
+                    # 显示数据概览
+                    st.success(f"✅ 成功加载数据：{len(accident_data)} 条事故记录")
+                    
+                    col1, col2, col3 = st.columns(3)
+                    with col1:
+                        st.metric("数据时间范围", 
+                                f"{accident_data['事故时间'].min().strftime('%Y-%m-%d')} 至 {accident_data['事故时间'].max().strftime('%Y-%m-%d')}")
+                    with col2:
+                        st.metric("事故类型分布", 
+                                f"财损: {len(accident_data[accident_data['事故类型']=='财损'])}起")
+                    with col3:
+                        st.metric("涉及区域", 
+                                f"{accident_data['所在街道'].nunique()}个街道")
+                    
+                    # 地点标准化函数（独立版本）
+                    def standardize_hotspot_locations(df):
+                        """标准化事故地点"""
+                        df = df.copy()
+                        
+                        def extract_road_info(location):
+                            if pd.isna(location):
+                                return "未知路段"
+                            
+                            location = str(location)
+                            
+                            # 常见路段关键词
+                            road_keywords = ['路', '道', '街', '巷', '路口', '交叉口', '大道', '公路', '口']
+                            area_keywords = ['新城', '临城', '千岛', '翁山', '海天', '海宇', '定沈', '滨海', '港岛', '体育', '长升', '金岛', '桃湾']
+                            
+                            # 提取包含关键词的路段
+                            for keyword in road_keywords + area_keywords:
+                                if keyword in location:
+                                    # 简化地点名称
+                                    words = location.split()
+                                    for word in words:
+                                        if keyword in word:
+                                            return word
+                                    return location
+                            
+                            # 如果没找到关键词，返回原地点（截断过长的）
+                            return location[:20] if len(location) > 20 else location
+                        
+                        df['standardized_location'] = df['事故具体地点'].apply(extract_road_info)
+                        
+                        # 手动标准化映射（根据实际数据调整）
+                        location_mapping = {
+                            '新城千岛路': '千岛路',
+                            '千岛路海天大道': '千岛路海天大道口',
+                            '海天大道千岛路': '千岛路海天大道口',
+                            '新城翁山路': '翁山路',
+                            '翁山路金岛路': '翁山路金岛路口',
+                            '海天大道临长路': '海天大道临长路口',
+                            '定沈路卫生医院门口': '定沈路医院段',
+                            '翁山路海城路西口': '翁山路海城路口',
+                            '海宇道路口': '海宇道',
+                            '海天大道路口': '海天大道',
+                            '定沈路交叉路口': '定沈路',
+                            '千岛路路口': '千岛路',
+                            '体育路路口': '体育路',
+                            '金岛路路口': '金岛路',
+                        }
+                        
+                        df['standardized_location'] = df['standardized_location'].replace(location_mapping)
+                        
+                        return df
+                    
+                    # 热点分析函数
+                    def analyze_hotspot_frequency(df, time_window='7D'):
+                        """分析地点事故频次"""
+                        df = standardize_hotspot_locations(df)
+                        
+                        # 计算时间窗口
+                        recent_cutoff = df['事故时间'].max() - pd.Timedelta(time_window)
+                        
+                        # 总体统计
+                        overall_stats = df.groupby('standardized_location').agg({
+                            '事故时间': ['count', 'max'],
+                            '事故类型': lambda x: x.mode()[0] if len(x.mode()) > 0 else '财损',
+                            '道路类型': lambda x: x.mode()[0] if len(x.mode()) > 0 else '城区道路',
+                            '路口路段类型': lambda x: x.mode()[0] if len(x.mode()) > 0 else '普通路段',
+                            'severity': 'sum'
+                        })
+                        
+                        # 扁平化列名
+                        overall_stats.columns = ['accident_count', 'last_accident', 'main_accident_type', 
+                                            'main_road_type', 'main_intersection_type', 'total_severity']
+                        
+                        # 近期统计
+                        recent_accidents = df[df['事故时间'] >= recent_cutoff]
+                        recent_stats = recent_accidents.groupby('standardized_location').agg({
+                            '事故时间': 'count',
+                            '事故类型': lambda x: x.mode()[0] if len(x.mode()) > 0 else '财损',
+                            'severity': 'sum'
+                        }).rename(columns={'事故时间': 'recent_count', '事故类型': 'recent_accident_type', 'severity': 'recent_severity'})
+                        
+                        # 合并数据
+                        result = overall_stats.merge(recent_stats, left_index=True, right_index=True, how='left').fillna(0)
+                        result['recent_count'] = result['recent_count'].astype(int)
+                        
+                        # 计算趋势指标
+                        result['trend_ratio'] = result['recent_count'] / result['accident_count']
+                        result['days_since_last'] = (df['事故时间'].max() - result['last_accident']).dt.days
+                        result['avg_severity'] = result['total_severity'] / result['accident_count']
+                        
+                        return result.sort_values(['recent_count', 'accident_count'], ascending=False)
+                    
+                    # 风险评分函数
+                    def calculate_hotspot_risk_score(hotspot_df):
+                        """计算路口风险评分"""
+                        df = hotspot_df.copy()
+                        
+                        # 事故频次得分 (0-40分)
+                        df['frequency_score'] = (df['accident_count'] / df['accident_count'].max() * 40).clip(0, 40)
+                        
+                        # 近期趋势得分 (0-30分)
+                        df['trend_score'] = (df['trend_ratio'] * 30).clip(0, 30)
+                        
+                        # 事故严重度得分 (0-20分)
+                        severity_map = {'财损': 5, '伤人': 15, '亡人': 20}
+                        df['severity_score'] = df['main_accident_type'].map(severity_map).fillna(5)
+                        
+                        # 时间紧迫度得分 (0-10分)
+                        df['urgency_score'] = ((30 - df['days_since_last']) / 30 * 10).clip(0, 10)
+                        
+                        # 总分
+                        df['risk_score'] = df['frequency_score'] + df['trend_score'] + df['severity_score'] + df['urgency_score']
+                        
+                        # 风险等级
+                        conditions = [
+                            df['risk_score'] >= 70,
+                            df['risk_score'] >= 50,
+                            df['risk_score'] >= 30
+                        ]
+                        choices = ['高风险', '中风险', '低风险']
+                        df['risk_level'] = np.select(conditions, choices, default='一般风险')
+                        
+                        return df.sort_values('risk_score', ascending=False)
+                    
+                    # 策略生成函数
+                    def generate_hotspot_strategies(hotspot_df, time_period='本周'):
+                        """生成热点针对性策略"""
+                        strategies = []
+                        
+                        for location_name, location_data in hotspot_df.iterrows():
+                            accident_count = location_data['accident_count']
+                            recent_count = location_data['recent_count']
+                            accident_type = location_data['main_accident_type']
+                            intersection_type = location_data['main_intersection_type']
+                            trend_ratio = location_data['trend_ratio']
+                            risk_level = location_data['risk_level']
+                            
+                            # 基础信息
+                            base_info = f"{time_period}对【{location_name}】"
+                            data_support = f"（近期{int(recent_count)}起，累计{int(accident_count)}起，{accident_type}为主）"
+                            
+                            # 智能策略生成
+                            strategy_parts = []
+                            
+                            # 基于路口类型和事故类型
+                            if '信号灯' in str(intersection_type):
+                                if accident_type == '财损':
+                                    strategy_parts.extend(["加强闯红灯查处", "优化信号配时", "整治不按规定让行"])
+                                else:
+                                    strategy_parts.extend(["完善人行过街设施", "加强非机动车管理", "设置警示标志"])
+                            elif '普通路段' in str(intersection_type):
+                                strategy_parts.extend(["加强巡逻管控", "整治违法停车", "设置限速标志"])
+                            else:
+                                strategy_parts.extend(["分析事故成因", "制定综合整治方案"])
+                            
+                            # 基于风险等级
+                            if risk_level == '高风险':
+                                strategy_parts.append("列为重点整治路段")
+                                strategy_parts.append("开展专项整治行动")
+                            elif risk_level == '中风险':
+                                strategy_parts.append("加强日常监管")
+                            
+                            # 基于趋势
+                            if trend_ratio > 0.4:
+                                strategy_parts.append("近期重点监控")
+                            
+                            # 组合策略
+                            if strategy_parts:
+                                strategy = base_info + "，" + "，".join(strategy_parts) + data_support
+                            else:
+                                strategy = base_info + "加强交通安全管理" + data_support
+                            
+                            strategies.append({
+                                'location': location_name,
+                                'strategy': strategy,
+                                'risk_level': risk_level,
+                                'accident_count': accident_count,
+                                'recent_count': recent_count
+                            })
+                        
+                        return strategies
+                    
+                    # 分析参数设置
+                    st.subheader("🔧 分析参数设置")
+                    col1, col2, col3 = st.columns(3)
+                    with col1:
+                        time_window = st.selectbox("统计时间窗口", ['7D', '15D', '30D'], index=0, key="hotspot_window")
+                    with col2:
+                        min_accidents = st.number_input("最小事故数", 1, 50, 3, key="hotspot_min_accidents")
+                    with col3:
+                        top_n = st.slider("显示热点数量", 3, 20, 8, key="hotspot_top_n")
+                    
+                    if st.button("🚀 开始热点分析", type="primary"):
+                        with st.spinner("正在分析事故热点分布..."):
+                            # 执行热点分析
+                            hotspots = analyze_hotspot_frequency(accident_data, time_window=time_window)
+                            
+                            # 过滤最小事故数
+                            hotspots = hotspots[hotspots['accident_count'] >= min_accidents]
+                            
+                            if len(hotspots) > 0:
+                                # 计算风险评分
+                                hotspots_with_risk = calculate_hotspot_risk_score(hotspots.head(top_n * 3))
+                                top_hotspots = hotspots_with_risk.head(top_n)
+                                
+                                # 显示热点排名
+                                st.subheader(f"📊 事故多发路口排名（前{top_n}个）")
+                                
+                                display_df = top_hotspots[[
+                                    'accident_count', 'recent_count', 'trend_ratio', 
+                                    'main_accident_type', 'main_intersection_type', 'risk_score', 'risk_level'
+                                ]].rename(columns={
+                                    'accident_count': '累计事故',
+                                    'recent_count': '近期事故',
+                                    'trend_ratio': '趋势比例',
+                                    'main_accident_type': '主要类型',
+                                    'main_intersection_type': '路口类型',
+                                    'risk_score': '风险评分',
+                                    'risk_level': '风险等级'
+                                })
+                                
+                                # 格式化显示
+                                styled_df = display_df.style.format({
+                                    '趋势比例': '{:.2f}',
+                                    '风险评分': '{:.1f}'
+                                }).background_gradient(subset=['风险评分'], cmap='Reds')
+                                
+                                st.dataframe(styled_df, use_container_width=True)
+                                
+                                # 生成策略建议
+                                strategies = generate_hotspot_strategies(top_hotspots, time_period='本周')
+                                
+                                st.subheader("🎯 针对性策略建议")
+                                
+                                for i, strategy_info in enumerate(strategies, 1):
+                                    strategy = strategy_info['strategy']
+                                    risk_level = strategy_info['risk_level']
+                                    
+                                    # 根据风险等级显示不同颜色
+                                    if risk_level == '高风险':
+                                        st.error(f"🚨 **{i}. {strategy}**")
+                                    elif risk_level == '中风险':
+                                        st.warning(f"⚠️ **{i}. {strategy}**")
+                                    else:
+                                        st.info(f"✅ **{i}. {strategy}**")
+                                
+                                # 可视化分析
+                                st.subheader("📈 数据分析可视化")
+                                
+                                col1, col2 = st.columns(2)
+                                
+                                with col1:
+                                    # 事故频次分布图
+                                    fig1 = px.bar(
+                                        top_hotspots.head(10),
+                                        x=top_hotspots.head(10).index,
+                                        y=['accident_count', 'recent_count'],
+                                        title="事故频次TOP10分布",
+                                        labels={'value': '事故数量', 'variable': '类型', 'index': '路口名称'},
+                                        barmode='group'
+                                    )
+                                    fig1.update_layout(xaxis_tickangle=-45)
+                                    st.plotly_chart(fig1, use_container_width=True)
+                                
+                                with col2:
+                                    # 风险等级分布
+                                    risk_dist = top_hotspots['risk_level'].value_counts()
+                                    fig2 = px.pie(
+                                        values=risk_dist.values,
+                                        names=risk_dist.index,
+                                        title="风险等级分布",
+                                        color_discrete_map={'高风险': 'red', '中风险': 'orange', '低风险': 'green'}
+                                    )
+                                    st.plotly_chart(fig2, use_container_width=True)
+                                
+                                # 详细数据下载
+                                st.subheader("💾 数据导出")
+                                
+                                col_dl1, col_dl2 = st.columns(2)
+                                
+                                with col_dl1:
+                                    # 下载热点数据
+                                    hotspot_csv = top_hotspots.to_csv().encode('utf-8-sig')
+                                    st.download_button(
+                                        "📥 下载热点数据CSV",
+                                        data=hotspot_csv,
+                                        file_name=f"accident_hotspots_{datetime.now().strftime('%Y%m%d')}.csv",
+                                        mime="text/csv"
+                                    )
+                                
+                                with col_dl2:
+                                    # 下载策略报告
+                                    report_data = {
+                                        "analysis_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                                        "time_window": time_window,
+                                        "data_source": hotspot_file.name,
+                                        "total_records": len(accident_data),
+                                        "analysis_parameters": {
+                                            "min_accidents": min_accidents,
+                                            "top_n": top_n
+                                        },
+                                        "top_hotspots": top_hotspots.to_dict('records'),
+                                        "recommended_strategies": strategies,
+                                        "summary": {
+                                            "high_risk_count": len(top_hotspots[top_hotspots['risk_level'] == '高风险']),
+                                            "medium_risk_count": len(top_hotspots[top_hotspots['risk_level'] == '中风险']),
+                                            "total_analyzed_locations": len(hotspots),
+                                            "most_dangerous_location": top_hotspots.index[0] if len(top_hotspots) > 0 else "无"
+                                        }
+                                    }
+                                    
+                                    st.download_button(
+                                        "📄 下载完整分析报告",
+                                        data=json.dumps(report_data, ensure_ascii=False, indent=2),
+                                        file_name=f"hotspot_analysis_report_{datetime.now().strftime('%Y%m%d_%H%M')}.json",
+                                        mime="application/json"
+                                    )
+                                
+                            else:
+                                st.warning("⚠️ 未找到符合条件的事故热点数据，请调整筛选参数")
+                    
+                    # 显示原始数据预览（可选）
+                    with st.expander("📋 查看原始数据预览"):
+                        st.dataframe(accident_data[['事故时间', '所在街道', '事故类型', '事故具体地点', '道路类型']].head(10), 
+                                use_container_width=True)
+                        
+                except Exception as e:
+                    st.error(f"❌ 数据加载失败：{str(e)}")
+                    st.info("请检查文件格式是否正确，确保包含'事故时间'、'事故类型'、'事故具体地点'等必要字段")
+            
+            else:
+                st.info("👆 请上传事故数据Excel文件开始分析")
+                st.markdown("""
+                ### 📝 支持的数据格式要求：
+                - **文件格式**: Excel (.xlsx)
+                - **必要字段**:
+                - `事故时间`: 事故发生时的时间
+                - `事故类型`: 财损/伤人/亡人
+                - `事故具体地点`: 详细的事故发生地点
+                - `所在街道`: 事故发生的街道区域
+                - `道路类型`: 城区道路/其他等
+                - `路口路段类型`: 信号灯路口/普通路段等
+                """)
        # --- Tab 1: 总览页
        with tab_dash:
            fig_line = go.Figure()
@@ -781,8 +1354,32 @@ def run_streamlit_app():
            else:
                st.info("请设置预测参数并点击“应用预测参数”按钮。")

-        # --- Tab 3: 异常检测
-        with tab3:
+        # --- Tab 3: 模型评估
+        with tab_eval:
+            st.subheader("模型预测效果对比")
+            with st.form(key="model_eval_form"):
+                horizon_sel = st.slider("评估窗口（天）", 7, 60, 30, step=1)
+                submit_eval = st.form_submit_button("应用评估参数")
+
+            if submit_eval:
+                try:
+                    df_metrics = evaluate_models(base['accident_count'], horizon=horizon_sel)
+                    st.dataframe(df_metrics, use_container_width=True)
+                    best_model = df_metrics['RMSE'].idxmin()
+                    st.success(f"过去 {horizon_sel} 天中，RMSE 最低的模型是：**{best_model}**")
+                    st.download_button(
+                        "下载评估结果 CSV",
+                        data=df_metrics.to_csv().encode('utf-8-sig'),
+                        file_name="model_evaluation.csv",
+                        mime="text/csv"
+                    )
+                except ValueError as err:
+                    st.warning(str(err))
+            else:
+                st.info("请设置评估窗口并点击“应用评估参数”按钮。")
+
+        # --- Tab 4: 异常检测
+        with tab_anom:
            anomalies, anomaly_fig = detect_anomalies(base['accident_count'])
            st.plotly_chart(anomaly_fig, use_container_width=True)
            st.write(f"检测到异常点：{len(anomalies)} 个")
@@ -790,8 +1387,8 @@ def run_streamlit_app():
                            data=anomalies.to_series().to_csv(index=False).encode('utf-8-sig'),
                            file_name="anomalies.csv", mime="text/csv")

-        # --- Tab 4: 综合策略评估
-        with tab4:
+        # --- Tab 5: 策略评估
+        with tab_strat:
            st.info(f"📌 检测到的策略类型：{', '.join(all_strategy_types) or '（数据中没有策略）'}")
            if all_strategy_types:
                results, recommendation = generate_output_and_recommendations(base, all_strategy_types,
@@ -808,9 +1405,8 @@ def run_streamlit_app():
            else:
                st.warning("数据中没有检测到策略。")

-
-        # --- Tab 5: 策略对比
-        with tab5:
+        # --- Tab 6: 策略对比
+        with tab_comp:
            def strategy_metrics(strategy):
                mask = base['strategy_type'].apply(lambda x: strategy in x)
                if not mask.any():
@@ -877,8 +1473,8 @@ def run_streamlit_app():
            else:
                st.warning("没有策略可供对比。")

-        # --- Tab 6: 情景模拟
-        with tab6:
+        # --- Tab 7: 情景模拟
+        with tab_sim:
            st.subheader("情景模拟")
            st.write("选择一个日期与策略，模拟“在该日期上线该策略”的影响：")
            with st.form(key="simulation_form"):
@@ -914,29 +1510,61 @@ def run_streamlit_app():
            else:
                st.info("请设置模拟参数并点击“应用模拟参数”按钮。")

-        # --- Tab 7: 模型评估
-        with tab7:
-            st.subheader("模型预测效果对比")
-            with st.form(key="model_eval_form"):
-                horizon_sel = st.slider("评估窗口（天）", 7, 60, 30, step=1)
-                submit_eval = st.form_submit_button("应用评估参数")
-
-            if submit_eval:
-                try:
-                    df_metrics = evaluate_models(base['accident_count'], horizon=horizon_sel)
-                    st.dataframe(df_metrics, use_container_width=True)
-                    best_model = df_metrics['RMSE'].idxmin()
-                    st.success(f"过去 {horizon_sel} 天中，RMSE 最低的模型是：**{best_model}**")
-                    st.download_button(
-                        "下载评估结果 CSV",
-                        data=df_metrics.to_csv().encode('utf-8-sig'),
-                        file_name="model_evaluation.csv",
-                        mime="text/csv"
-                    )
-                except ValueError as err:
-                    st.warning(str(err))
+        # --- New Tab 8: GPT 分析
+        with tab_gpt:
+            from openai import OpenAI
+            st.subheader("GPT 数据分析与改进建议")
+            # open_ai_key = f"sk-dQhKOOG48iVEfgJfAb14458dA4474fB09aBbE8153d4aB3Fc"
+            if not HAS_OPENAI:
+                st.warning("未安装 `openai` 库。请安装后重试。")
+            elif not openai_api_key:
+                st.info("请在左侧边栏输入 OpenAI API Key 以启用 GPT 分析。")
            else:
-                st.info("请设置评估窗口并点击“应用评估参数”按钮。")
+                if all_strategy_types:
+                    # Generate results if not already
+                    results, recommendation = generate_output_and_recommendations(base, all_strategy_types,
+                                                                                  region=region_sel if region_sel != '全市' else '全市')
+                    df_res = pd.DataFrame(results).T
+                    kpi_json = json.dumps(kpi, ensure_ascii=False, indent=2)
+                    results_json = df_res.to_json(orient="records", force_ascii=False)
+                    recommendation_text = recommendation
+
+                    # Prepare data to send
+                    data_to_analyze = {
+                        "kpis": kpi_json,
+                        "strategy_results": results_json,
+                        "recommendation": recommendation_text
+                    }
+                    data_str = json.dumps(data_to_analyze, ensure_ascii=False)
+
+                    prompt = str(f"""
+                    请分析以下交通安全分析结果，包括KPI指标、策略评估结果和推荐。
+                    提供数据结果的详细分析，以及改进思路和建议。
+                    数据：{str(data_str)}
+                    """)
+                    #st.text_area(prompt)
+                    if st.button("上传数据至 GPT 并获取分析"):
+                        try:
+                            client = OpenAI(
+                                    base_url=open_ai_base_url,
+                                    # sk-xxx替换为自己的key
+                                    api_key=openai_api_key
+                            )
+                            response = client.chat.completions.create(
+                                model="gpt-4o",
+                                messages=[
+                                    {"role": "system", "content": "You are a helpful assistant that analyzes traffic safety data."},
+                                    {"role": "user", "content": prompt}
+                                ],
+                                stream=False
+                            )
+                            gpt_response = response.choices[0].message.content 
+                            st.markdown("### GPT 分析结果与改进思路")
+                            st.markdown(gpt_response, unsafe_allow_html=True)
+                        except Exception as e:
+                            st.error(f"调用 OpenAI API 失败：{str(e)}")
+                else:
+                    st.warning("没有策略数据可供分析。")

                # Update refresh time
                st.session_state['last_refresh'] = datetime.now()
@@ -945,4 +1573,4 @@ def run_streamlit_app():
        st.info("请先在左侧上传事故数据与策略数据，并点击“应用数据与筛选”按钮。")

 if __name__ == "__main__":
-    run_streamlit_app()
+    run_streamlit_app()
--- a/readme.md
+++ b/readme.md
@@ -20,18 +20,20 @@
 ### 安装

 1. 克隆仓库：
-   ```bash
-   git clone https://github.com/tongnian0613/TrafficSafeAnalyzer.git
-   cd TrafficSafeAnalyzer
-   ```
+
+```bash
+git clone https://github.com/tongnian0613/TrafficSafeAnalyzer.git
+cd TrafficSafeAnalyzer
+```

 2. 创建虚拟环境（推荐）：
-   ```bash
-conda create -n trafficsa python=3.9 -y
+
+```bash
+conda create -n trafficsa python=3.12 -y
 conda activate trafficsa
 pip install -r requirements.txt
 streamlit run app.py
-   ```
+```

 3. 安装依赖：

@@ -78,6 +80,7 @@ pytz>=2021.3
 openpyxl>=3.0.9
 xlrd>=2.0.1
 cryptography>=3.4.7
+openai>=2.0.0
 ```

 ## 配置参数
--- a/recommendation.txt
+++ b/recommendation.txt
@@ -0,0 +1 @@
+建议在全市区域长期实施策略类型 交通管制措施
--- a/requirements.txt
+++ b/requirements.txt
@@ -24,5 +24,11 @@ xlrd>=2.0.1      # For older Excel files
 # Security/authentication
 cryptography>=3.4.7

+# OpenAI
+openai
+
+# jieba for Chinese text segmentation
+jieba
+
 # Note: hashlib and json are part of Python standard library
 # Note: os and datetime are part of Python standard library
--- a/sample/新城2021-2022.xlsx
+++ b/sample/新城2021-2022.xlsx
--- a/sample/舟山交警2020-2022.xlsx
+++ b/sample/舟山交警2020-2022.xlsx
Author	SHA1	Message	Date
ntnt	a5e3c4c1da	modify: 更新了requirements.txt	2025-10-10 08:14:46 +08:00
ntnt	69488904a0	modify: 删除隐私文件	2025-10-10 08:13:32 +08:00
ntnt	00e766eaa7	modify: 删除隐私文件	2025-10-10 08:13:22 +08:00
ntnt	af4285e147	modify: 删除隐私文件	2025-10-10 08:12:07 +08:00
ntnt	c69419d816	modify: 删除隐私文件	2025-10-10 08:10:25 +08:00
ntnt	a9845d084e	modify: 增加了热点识别和策略建议功能	2025-10-10 07:54:45 +08:00
童年	0e4ab82ca2	Update readme.md	2025-08-28 23:23:13 +08:00
ntnt	17a8336e13	Update README; ADD GPT	2025-08-28 23:15:38 +08:00
				`@@ -0,0 +1 @@`
				`建议在全市区域长期实施策略类型交通管制措施`