import argparse
import os

from pathlib import Path

import awkward as ak
from pyarrow import parquet as pq

import uproot

import duckdb as ddb


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Convert a ROOT file to a parquet file"
    )
    parser.add_argument("filename", help="input root filename")
    args = parser.parse_args()
    r_name = args.filename.rsplit(".", 1)[0]
    r_path = Path(r_name)
    r_path.mkdir(parents=True, exist_ok=True)
    with uproot.open(args.filename, object_cache=None) as root_fp:
        for key_name in root_fp.keys():
            try:
                print("Converting ", key_name)
                c_tree = root_fp[key_name]
                c_name = key_name.replace("/", "_").replace(";1", "").replace(";", "_")
                c_awk = c_tree.arrays(library="ak")
                c_table = ak.to_arrow_table(c_awk, explode_records=True)
                pq.write_table(c_table, str(r_path / f"{c_name}.parquet"))
            except Exception as e:
                print("=========================")
                print(e)
                print("Failed!")
                print("=========================")
    print("")
    print("Building DuckDB Database")
    print("")
    con = ddb.connect(database=f"{r_name}.duckdb")
    for ff in r_path.glob("*.parquet"):
        t_name = ff.name.split(".")[0]
        con.execute(
            f"CREATE TABLE {t_name} AS SELECT * FROM read_parquet('{r_name}/{t_name}.parquet')"
        )
    con.close()

By admin

Leave a Reply

Your email address will not be published.