From bab23db8bb13832ea326af5e1a847640ccb04cce Mon Sep 17 00:00:00 2001 From: Trygve Laugstøl Date: Mon, 12 Aug 2019 14:43:54 +0200 Subject: souffle: Writing out SPN and MPN for each part. --- src/ee/tools/__init__.py | 1 + src/ee/tools/part_apply_souffle_pre.py | 73 ++++++++++++++++++++++------------ 2 files changed, 49 insertions(+), 25 deletions(-) diff --git a/src/ee/tools/__init__.py b/src/ee/tools/__init__.py index d6d1298..1c4e769 100644 --- a/src/ee/tools/__init__.py +++ b/src/ee/tools/__init__.py @@ -8,6 +8,7 @@ from ee.logging import log __all__ = [ "mk_parents", + "mk_dirs", "log" # for compatibility, should be removed ] diff --git a/src/ee/tools/part_apply_souffle_pre.py b/src/ee/tools/part_apply_souffle_pre.py index 3df9eda..790ffa2 100644 --- a/src/ee/tools/part_apply_souffle_pre.py +++ b/src/ee/tools/part_apply_souffle_pre.py @@ -8,6 +8,42 @@ from ee.part import Part, load_db from ee.tools import mk_dirs +def write_csvs(print_header, in_parts, part_f, part_pn_f, fact_f): + dialect = "excel-tab" + part_csv = csv.writer(part_f, dialect=dialect) + if print_header: + part_csv.writerow(["uri", "ref"]) + + part_pn_csv = csv.writer(part_pn_f, dialect=dialect) + if print_header: + part_pn_csv.writerow(["part_uri", "spn", "mpn"]) + + fact_csv = csv.writer(fact_f, dialect=dialect) + if print_header: + fact_csv.writerow(["part_uri", "key", "value"]) + + part_count = 0 + fact_count = 0 + + for xml in in_parts.iterparts(): + p = Part(xml) + sch_ref = p.get_only_schematic_reference() + part_csv.writerow([p.uri, sch_ref.referenceProp if sch_ref else None]) + + spn = p.get_only_spn() + mpn = p.get_only_mpn() + if spn is not None or mpn is not None: + part_pn_csv.writerow([p.uri, spn.valueProp if spn else None, mpn.valueProp if mpn else None]) + + part_count += 1 + + for fact in p.get_facts(): + fact_csv.writerow([p.uri, fact.keyProp, fact.valueProp]) + fact_count += 1 + + return part_count, fact_count + + def work(in_path: Path, work_dir: Path): in_parts = load_db(in_path) @@ -15,36 +51,23 @@ def work(in_path: Path, work_dir: Path): tools.mk_dirs(work_dir) with (work_dir / "facts.dl").open("w") as f: - print(".decl part(uri:symbol, ref:symbol)", file=f) - print(".input part", file=f) - print("", file=f) - print(".decl fact(part_uri:symbol, key:symbol, value:symbol)", file=f) - print(".input fact", file=f) + print(""" +.decl part(uri:symbol, ref:symbol) +.input part - part_count = 0 - fact_count = 0 +.decl fact(part_uri:symbol, key:symbol, value:symbol) +.input fact + +.decl part_pn(part_uri:symbol, spn:symbol, mpn:symbol) +.input part_pn +""".strip(), file=f) in_dir = work_dir / "in" mk_dirs(in_dir) with (in_dir / "part.facts").open("w") as part_f: - with (in_dir / "fact.facts").open("w") as fact_f: - part_csv = csv.writer(part_f, dialect="excel-tab") - if print_header: - part_csv.writerow(["uri", "ref"]) - - fact_csv = csv.writer(fact_f, dialect="excel-tab") - if print_header: - fact_csv.writerow(["part_uri", "key", "value"]) - - for xml in in_parts.iterparts(): - p = Part(xml) - sch_ref = p.get_only_schematic_reference() - part_csv.writerow([p.uri, sch_ref.referenceProp if sch_ref else None]) - part_count += 1 - - for fact in p.get_facts(): - fact_csv.writerow([p.uri, fact.keyProp, fact.valueProp]) - fact_count += 1 + with (in_dir / "part_pn.facts").open("w") as part_pn_f: + with (in_dir / "fact.facts").open("w") as fact_f: + part_count, fact_count = write_csvs(print_header, in_parts, part_f, part_pn_f, fact_f) print("Loaded clauses: {} parts, {} facts".format(part_count, fact_count)) -- cgit v1.2.3