From c368b1d4b7d1dfe148ba93ffc1c0de7122c4e66b Mon Sep 17 00:00:00 2001 From: Trygve Laugstøl Date: Wed, 16 Oct 2019 08:48:20 +0200 Subject: souffle: Adding support for categories. --- src/ee/tools/part_apply_souffle_pre.py | 46 ++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 8 deletions(-) (limited to 'src/ee/tools') diff --git a/src/ee/tools/part_apply_souffle_pre.py b/src/ee/tools/part_apply_souffle_pre.py index 17dff44..b228d6f 100644 --- a/src/ee/tools/part_apply_souffle_pre.py +++ b/src/ee/tools/part_apply_souffle_pre.py @@ -4,7 +4,8 @@ from pathlib import Path import ee.tools from ee import tools -from ee.part import Part, load_db +from ee.logging import log +from ee.part import Part, PartDb, load_db from ee.tools import mk_dirs @@ -19,20 +20,26 @@ class SouffleDialect(csv.Dialect): # quoting = csv.QUOTE_NONE -def write_csvs(print_header, in_parts, part_f, part_pn_f, fact_f): - dialect = SouffleDialect - part_csv = csv.writer(part_f, dialect=dialect) +_dialect = SouffleDialect + + +def write_csvs(print_header, in_parts: PartDb, part_f, part_pn_f, fact_f, part_main_category_f): + part_csv = csv.writer(part_f, dialect=_dialect) if print_header: part_csv.writerow(["uri", "ref"]) - part_pn_csv = csv.writer(part_pn_f, dialect=dialect) + part_pn_csv = csv.writer(part_pn_f, dialect=_dialect) if print_header: part_pn_csv.writerow(["part_uri", "spn", "mpn"]) - fact_csv = csv.writer(fact_f, dialect=dialect) + fact_csv = csv.writer(fact_f, dialect=_dialect) if print_header: fact_csv.writerow(["part_uri", "key", "value"]) + main_category_csv = csv.writer(part_main_category_f, dialect=_dialect) + if print_header: + main_category_csv.writerow(["part_uri", "category_uri"]) + part_count = 0 fact_count = 0 @@ -52,9 +59,21 @@ def write_csvs(print_header, in_parts, part_f, part_pn_f, fact_f): fact_csv.writerow([p.uri, fact.keyProp, fact.valueProp]) fact_count += 1 + if p.xml.categoryProp: + main_category_csv.writerow([p.uri, p.xml.categoryProp]) + return part_count, fact_count +def write_category(print_header, in_parts: PartDb, category_f): + category_csv = csv.writer(category_f, dialect=_dialect) + if print_header: + category_csv.writerow(["uri", "name", "parent", "href"]) + + for c in in_parts.categories: + category_csv.writerow([c.uri, c.name, c.parent.uri if c.parent else None, c.href]) + + def work(in_path: Path, work_dir: Path): in_parts = load_db(in_path) @@ -71,6 +90,12 @@ def work(in_path: Path, work_dir: Path): .decl part_pn(part_uri:symbol, spn:symbol, mpn:symbol) .input part_pn + +.decl part_main_category(part_uri:symbol, category_uri:symbol) +.input part_main_category + +.decl category(category_uri:symbol, name:symbol, parent:symbol, href:symbol) +.input category """.strip(), file=f) in_dir = work_dir / "in" @@ -78,9 +103,14 @@ def work(in_path: Path, work_dir: Path): with (in_dir / "part.facts").open("w") as part_f: with (in_dir / "part_pn.facts").open("w") as part_pn_f: with (in_dir / "fact.facts").open("w") as fact_f: - part_count, fact_count = write_csvs(print_header, in_parts, part_f, part_pn_f, fact_f) + with (in_dir / "part_main_category.facts").open("w") as part_main_category_f: + part_count, fact_count = write_csvs(print_header, in_parts, part_f, part_pn_f, fact_f, + part_main_category_f) + + with (in_dir / "category.facts").open("w") as category_f: + write_category(print_header, in_parts, category_f) - print("Loaded clauses: {} parts, {} facts".format(part_count, fact_count)) + log.info("Loaded clauses: {} parts, {} facts".format(part_count, fact_count)) parser = argparse.ArgumentParser() -- cgit v1.2.3