Files
problem-bank/backend/services/import_queue_service.py

181 lines
5.6 KiB
Python
Raw Normal View History

2026-03-06 15:52:34 +08:00
"""
Import queue: single-consumer FIFO worker and job execution.
Run run_worker_loop() in a background thread; on startup call reset_stale_running_jobs().
"""
from datetime import datetime
from pathlib import Path
import time
from sqlalchemy.orm import Session
from backend.database import SessionLocal
from backend.models import (
JOB_STATUS_FAILED,
JOB_STATUS_QUEUED,
JOB_STATUS_RUNNING,
JOB_STATUS_SUCCESS,
ImportHistory,
ImportJob,
ImportJobItem,
Question,
)
from backend.repositories import import_job_repo as repo
from backend.services.excel_service import parse_excel_file
from backend.services.parser import OpenAICompatibleParserService, extract_metadata
def _build_ai_rows(path: Path) -> list[dict]:
parser = OpenAICompatibleParserService()
metadata = extract_metadata(path.name)
questions = parser.parse_file(str(path))
rows = []
for q in questions:
rows.append(
{
"chapter": metadata["chapter"],
"primary_knowledge": "",
"secondary_knowledge": metadata["secondary_knowledge"],
"question_type": metadata["question_type"],
"difficulty": metadata["difficulty"],
"stem": q.get("题干", ""),
"option_a": q.get("选项A", ""),
"option_b": q.get("选项B", ""),
"option_c": q.get("选项C", ""),
"option_d": q.get("选项D", ""),
"answer": q.get("正确答案", ""),
"explanation": q.get("解析", ""),
"notes": q.get("备注", ""),
"source_file": metadata["source_file"],
}
)
return rows
def _process_one_item(
db: Session,
job: ImportJob,
item: ImportJobItem,
method: str,
) -> None:
path = Path(item.stored_path)
filename = item.filename
job.current_file = filename
job.current_index = item.seq
job.updated_at = datetime.utcnow()
item.status = JOB_STATUS_RUNNING
item.started_at = datetime.utcnow()
db.commit()
try:
if method == "excel":
if path.suffix.lower() not in [".xlsx", ".xlsm", ".xltx", ".xltm"]:
raise ValueError("仅支持 Excel 文件")
rows = parse_excel_file(path)
else:
rows = _build_ai_rows(path)
questions = [Question(**row) for row in rows]
if questions:
db.add_all(questions)
db.add(
ImportHistory(
filename=filename,
method=method,
question_count=len(questions),
status="success",
)
)
db.commit()
item.status = JOB_STATUS_SUCCESS
item.question_count = len(questions)
item.ended_at = datetime.utcnow()
job.success_count += 1
job.processed += 1
job.updated_at = datetime.utcnow()
db.commit()
except Exception as exc:
db.rollback()
db.add(
ImportHistory(
filename=filename,
method=method,
question_count=0,
status="failed",
)
)
db.commit()
item.status = JOB_STATUS_FAILED
item.error = str(exc)
item.ended_at = datetime.utcnow()
job.failed_count += 1
job.processed += 1
job.updated_at = datetime.utcnow()
db.commit()
def process_job(db: Session, job_id: int) -> None:
"""Execute a single job: process all items in order, then set job terminal status."""
job = repo.get_job(db, job_id)
if not job or job.status != JOB_STATUS_RUNNING:
return
method = job.method
items = sorted(job.items, key=lambda x: x.seq)
# Resume: ensure processed/success_count/failed_count reflect already-completed items
job.processed = sum(1 for it in items if it.status in (JOB_STATUS_SUCCESS, JOB_STATUS_FAILED))
job.success_count = sum(1 for it in items if it.status == JOB_STATUS_SUCCESS)
job.failed_count = sum(1 for it in items if it.status == JOB_STATUS_FAILED)
db.commit()
for item in items:
if item.status in (JOB_STATUS_SUCCESS, JOB_STATUS_FAILED):
continue
_process_one_item(db, job, item, method)
db.refresh(job)
job = repo.get_job(db, job_id)
if not job:
return
if job.failed_count > 0 and job.success_count == 0:
job.status = JOB_STATUS_FAILED
job.error = "部分或全部文件处理失败"
else:
job.status = JOB_STATUS_SUCCESS
job.error = ""
job.ended_at = datetime.utcnow()
job.current_file = ""
job.updated_at = datetime.utcnow()
db.commit()
def reset_stale_running_jobs(db: Session) -> int:
"""On startup: set any job left in 'running' back to 'queued' so worker can pick it up."""
count = 0
for job in db.query(ImportJob).filter(ImportJob.status == JOB_STATUS_RUNNING).all():
job.status = JOB_STATUS_QUEUED
count += 1
if count:
db.commit()
return count
def run_worker_loop(interval_seconds: float = 1.0) -> None:
"""
Single-consumer FIFO loop. Call from a background thread.
Claims oldest queued job, processes it, then repeats. Sleeps when no job.
"""
while True:
db = SessionLocal()
try:
job = repo.claim_oldest_queued(db)
if job:
process_job(db, job.id)
else:
time.sleep(interval_seconds)
except Exception:
if db:
db.rollback()
time.sleep(interval_seconds)
finally:
db.close()