108 lines
2.5 KiB
Bash
Executable File
108 lines
2.5 KiB
Bash
Executable File
#!/bin/bash
|
|
# 批量关键词匹配脚本
|
|
# 处理 data/pho_analysis_merged/ 中的所有 xlsx 文件
|
|
|
|
set -e
|
|
|
|
# 获取脚本所在目录
|
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
|
|
|
|
# 目录配置
|
|
INPUT_DIR="$PROJECT_DIR/data/pho_analysis_merged"
|
|
OUTPUT_DIR="$PROJECT_DIR/data/output"
|
|
KEYWORDS_FILE="$PROJECT_DIR/data/keywords/keywords_all.xlsx"
|
|
|
|
# 颜色输出
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
RED='\033[0;31m'
|
|
NC='\033[0m' # No Color
|
|
|
|
echo "=============================================="
|
|
echo "批量关键词匹配"
|
|
echo "=============================================="
|
|
echo "输入目录: $INPUT_DIR"
|
|
echo "输出目录: $OUTPUT_DIR"
|
|
echo "关键词文件: $KEYWORDS_FILE"
|
|
echo ""
|
|
|
|
# 检查输入目录
|
|
if [ ! -d "$INPUT_DIR" ]; then
|
|
echo -e "${RED}错误: 输入目录不存在: $INPUT_DIR${NC}"
|
|
exit 1
|
|
fi
|
|
|
|
# 检查关键词文件
|
|
if [ ! -f "$KEYWORDS_FILE" ]; then
|
|
echo -e "${YELLOW}警告: 关键词文件不存在: $KEYWORDS_FILE${NC}"
|
|
echo "将使用默认关键词文件"
|
|
KEYWORDS_FILE=""
|
|
fi
|
|
|
|
# 创建输出目录
|
|
mkdir -p "$OUTPUT_DIR"
|
|
|
|
# 统计
|
|
total=0
|
|
success=0
|
|
failed=0
|
|
|
|
# 获取所有 xlsx 文件
|
|
files=("$INPUT_DIR"/*.xlsx)
|
|
|
|
# 检查是否有文件
|
|
if [ ! -e "${files[0]}" ]; then
|
|
echo -e "${YELLOW}没有找到 xlsx 文件${NC}"
|
|
exit 0
|
|
fi
|
|
|
|
# 计算总数
|
|
for f in "${files[@]}"; do
|
|
if [ -f "$f" ]; then
|
|
((total++))
|
|
fi
|
|
done
|
|
|
|
echo "找到 $total 个文件待处理"
|
|
echo "----------------------------------------------"
|
|
|
|
# 处理每个文件
|
|
current=0
|
|
for input_file in "${files[@]}"; do
|
|
if [ ! -f "$input_file" ]; then
|
|
continue
|
|
fi
|
|
|
|
((current++))
|
|
|
|
# 获取文件名(不含扩展名)
|
|
filename=$(basename "$input_file" .xlsx)
|
|
output_file="$OUTPUT_DIR/${filename}_matched.xlsx"
|
|
|
|
echo -e "\n[$current/$total] 处理: $filename"
|
|
|
|
# 构建命令
|
|
cmd="python3 $SCRIPT_DIR/keyword_matcher.py -t \"$input_file\" -o \"$output_file\""
|
|
if [ -n "$KEYWORDS_FILE" ]; then
|
|
cmd="$cmd -k \"$KEYWORDS_FILE\""
|
|
fi
|
|
|
|
# 执行匹配
|
|
if eval "$cmd"; then
|
|
echo -e "${GREEN} ✓ 完成: ${filename}_matched.xlsx${NC}"
|
|
((success++))
|
|
else
|
|
echo -e "${RED} ✗ 失败: $filename${NC}"
|
|
((failed++))
|
|
fi
|
|
done
|
|
|
|
# 汇总
|
|
echo ""
|
|
echo "=============================================="
|
|
echo "处理完成"
|
|
echo "=============================================="
|
|
echo -e "总计: $total | ${GREEN}成功: $success${NC} | ${RED}失败: $failed${NC}"
|
|
echo "输出目录: $OUTPUT_DIR"
|