import argparse import csv from pathlib import Path import ee.tools from ee import tools from ee.logging import log from ee.part import Part, PartDb, load_db from ee.tools import mk_dirs class SouffleDialect(csv.Dialect): """Describe the usual properties of Excel-generated CSV files.""" delimiter = "\t" quotechar = 'ยค' doublequote = True skipinitialspace = False lineterminator = '\r\n' quoting = csv.QUOTE_MINIMAL # quoting = csv.QUOTE_NONE _dialect = SouffleDialect def write_csvs(print_header, in_parts: PartDb, part_f, part_pn_f, fact_f, part_main_category_f): part_csv = csv.writer(part_f, dialect=_dialect) if print_header: part_csv.writerow(["uri", "ref"]) part_pn_csv = csv.writer(part_pn_f, dialect=_dialect) if print_header: part_pn_csv.writerow(["part_uri", "spn", "mpn"]) fact_csv = csv.writer(fact_f, dialect=_dialect) if print_header: fact_csv.writerow(["part_uri", "key", "value"]) main_category_csv = csv.writer(part_main_category_f, dialect=_dialect) if print_header: main_category_csv.writerow(["part_uri", "category_uri"]) part_count = 0 fact_count = 0 for xml in in_parts.iterparts(): p = Part(xml) sch_ref = p.get_only_schematic_reference() part_csv.writerow([p.uri, sch_ref.referenceProp if sch_ref else None]) spn = p.get_only_spn() mpn = p.get_only_mpn() if spn is not None or mpn is not None: part_pn_csv.writerow([p.uri, spn.valueProp if spn else None, mpn.valueProp if mpn else None]) part_count += 1 for fact in p.get_facts(): fact_csv.writerow([p.uri, fact.keyProp, fact.valueProp]) fact_count += 1 if p.xml.categoryProp: main_category_csv.writerow([p.uri, p.xml.categoryProp]) return part_count, fact_count def write_category(print_header, in_parts: PartDb, category_f): category_csv = csv.writer(category_f, dialect=_dialect) if print_header: category_csv.writerow(["uri", "name", "parent", "href"]) for c in in_parts.categories: category_csv.writerow([c.uri, c.name, c.parent.uri if c.parent else None, c.href]) def work(in_path: Path, work_dir: Path): in_parts = load_db(in_path) print_header = False tools.mk_dirs(work_dir) with (work_dir / "facts.dl").open("w") as f: print(""" .decl part(uri:symbol, ref:symbol) .input part .decl fact(part_uri:symbol, key:symbol, value:symbol) .input fact .decl part_pn(part_uri:symbol, spn:symbol, mpn:symbol) .input part_pn .decl part_main_category(part_uri:symbol, category_uri:symbol) .input part_main_category .decl category(category_uri:symbol, name:symbol, parent:symbol, href:symbol) .input category """.strip(), file=f) in_dir = work_dir / "in" mk_dirs(in_dir) with (in_dir / "part.facts").open("w") as part_f: with (in_dir / "part_pn.facts").open("w") as part_pn_f: with (in_dir / "fact.facts").open("w") as fact_f: with (in_dir / "part_main_category.facts").open("w") as part_main_category_f: part_count, fact_count = write_csvs(print_header, in_parts, part_f, part_pn_f, fact_f, part_main_category_f) with (in_dir / "category.facts").open("w") as category_f: write_category(print_header, in_parts, category_f) log.info("Loaded clauses: {} parts, {} facts".format(part_count, fact_count)) parser = argparse.ArgumentParser() ee.tools.add_default_argparse_group(parser) parser.add_argument("--in", dest="in_path", required=True, metavar="PART DB") parser.add_argument("--work", required=True, metavar="DIR") args = parser.parse_args() ee.tools.process_default_argparse_group(args) work(Path(args.in_path), Path(args.work))