import argparse
import os
from pathlib import Path
import awkward as ak
from pyarrow import parquet as pq
import uproot
import duckdb as ddb
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Convert a ROOT file to a parquet file"
)
parser.add_argument("filename", help="input root filename")
args = parser.parse_args()
r_name = args.filename.rsplit(".", 1)[0]
r_path = Path(r_name)
r_path.mkdir(parents=True, exist_ok=True)
with uproot.open(args.filename, object_cache=None) as root_fp:
for key_name in root_fp.keys():
try:
print("Converting ", key_name)
c_tree = root_fp[key_name]
c_name = key_name.replace("/", "_").replace(";1", "").replace(";", "_")
c_awk = c_tree.arrays(library="ak")
c_table = ak.to_arrow_table(c_awk, explode_records=True)
pq.write_table(c_table, str(r_path / f"{c_name}.parquet"))
except Exception as e:
print("=========================")
print(e)
print("Failed!")
print("=========================")
print("")
print("Building DuckDB Database")
print("")
con = ddb.connect(database=f"{r_name}.duckdb")
for ff in r_path.glob("*.parquet"):
t_name = ff.name.split(".")[0]
con.execute(
f"CREATE TABLE {t_name} AS SELECT * FROM read_parquet('{r_name}/{t_name}.parquet')"
)
con.close()