From db6a0c66db74dc04b1adcc9b09fda3e1e81e3fcd Mon Sep 17 00:00:00 2001 From: Trygve Laugstøl Date: Tue, 28 May 2019 10:59:06 +0200 Subject: part-apply-souffle-port: Sorting the incoming CSV files before processing them. Keeps the diffs between runs to a minimum. --- src/ee/tools/part_apply_souffle_post.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'src/ee') diff --git a/src/ee/tools/part_apply_souffle_post.py b/src/ee/tools/part_apply_souffle_post.py index 6cc5f36..819bd03 100644 --- a/src/ee/tools/part_apply_souffle_post.py +++ b/src/ee/tools/part_apply_souffle_post.py @@ -21,10 +21,26 @@ def work(in_path: Path, out_path: Path, work_dir: Path): for part in in_parts: out_parts.add_entry(part, False) - mpn_csv = work_dir / "out" / "mpn.csv" + out_dir = work_dir / "out" + + # Sort the output to keep changes in the file to a minimum when the rules change. + for file in out_dir.iterdir(): + if file.name.endswith(".csv"): + with file.open("r") as f: + reader = csv.reader(f, dialect="excel-tab") + rows = sorted(reader) + + tmp = Path(str(file) + ".tmp") + with tmp.open("w") as out: + writer = csv.writer(out, dialect="excel-tab") + writer.writerows(rows) + + tmp.rename(file) + + mpn_csv = out_dir / "mpn.csv" new_facts = 0 - with (work_dir / "out" / "fact.csv").open("r") as f: + with (out_dir / "fact.csv").open("r") as f: reader = csv.reader(f, dialect="excel-tab") for uri, key, value in reader: try: -- cgit v1.2.3