2026_mcm_b/rocket_launch_to_csv.py

#!/usr/bin/env python3
"""将 rocket launch counts.txt 数据导出为 CSV 文件"""

import csv
import re


def parse_header(header_line: str) -> list[str]:
    """从 # 开头的表头行解析列名"""
    # 去掉开头的 # 和多余空格，按空白分割
    clean = header_line.lstrip("#").strip()
    # 列名之间可能有多空格，统一按空白分割
    return re.split(r"\s{2,}", clean)


def parse_data_line(line: str) -> list[str]:
    """解析数据行，按空白分割"""
    return line.split()


def main():
    input_file = "rocket launch counts.txt"
    output_file = "rocket_launch_counts.csv"

    headers = None
    rows = []

    with open(input_file, "r", encoding="utf-8") as f:
        for line in f:
            line = line.rstrip("\n")
            if not line.strip():
                continue
            if line.startswith("#"):
                # 第二行是分隔线，第一行是列名
                if "Bin" in line and "YDate" in line and headers is None:
                    headers = parse_header(line)
                continue
            # 数据行
            values = parse_data_line(line)
            if len(values) >= 4:  # 至少包含 Bin, YDate, ValMin, ValMax
                rows.append(values)

    # 如果没有成功解析表头，使用默认列名
    if headers is None or len(headers) != len(rows[0]) if rows else True:
        headers = [
            "Bin", "YDate", "ValMin", "ValMax",
            "US", "SU", "RU", "CN", "F", "J", "AU", "D", "UK", "I",
            "I-ELDO", "IN", "BR", "KR", "I-ESA", "KP", "IR", "IL", "CYM", "NZ",
            "Total"
        ]

    with open(output_file, "w", encoding="utf-8", newline="") as out:
        writer = csv.writer(out)
        writer.writerow(headers)
        writer.writerows(rows)

    print(f"已导出 {len(rows)} 行数据到 {output_file}")


if __name__ == "__main__":
    main()