aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTrygve Laugstøl <trygvis@inamo.no>2019-05-28 10:59:06 +0200
committerTrygve Laugstøl <trygvis@inamo.no>2019-05-28 10:59:06 +0200
commitdb6a0c66db74dc04b1adcc9b09fda3e1e81e3fcd (patch)
treeab796761e79a0eb354cc4f4468816e1ddf8dccd8
parent2e2956823c9cd02c766b296cbcbea9130bd07b36 (diff)
downloadee-python-db6a0c66db74dc04b1adcc9b09fda3e1e81e3fcd.tar.gz
ee-python-db6a0c66db74dc04b1adcc9b09fda3e1e81e3fcd.tar.bz2
ee-python-db6a0c66db74dc04b1adcc9b09fda3e1e81e3fcd.tar.xz
ee-python-db6a0c66db74dc04b1adcc9b09fda3e1e81e3fcd.zip
part-apply-souffle-port: Sorting the incoming CSV files before
processing them. Keeps the diffs between runs to a minimum.
-rw-r--r--src/ee/tools/part_apply_souffle_post.py20
1 files changed, 18 insertions, 2 deletions
diff --git a/src/ee/tools/part_apply_souffle_post.py b/src/ee/tools/part_apply_souffle_post.py
index 6cc5f36..819bd03 100644
--- a/src/ee/tools/part_apply_souffle_post.py
+++ b/src/ee/tools/part_apply_souffle_post.py
@@ -21,10 +21,26 @@ def work(in_path: Path, out_path: Path, work_dir: Path):
for part in in_parts:
out_parts.add_entry(part, False)
- mpn_csv = work_dir / "out" / "mpn.csv"
+ out_dir = work_dir / "out"
+
+ # Sort the output to keep changes in the file to a minimum when the rules change.
+ for file in out_dir.iterdir():
+ if file.name.endswith(".csv"):
+ with file.open("r") as f:
+ reader = csv.reader(f, dialect="excel-tab")
+ rows = sorted(reader)
+
+ tmp = Path(str(file) + ".tmp")
+ with tmp.open("w") as out:
+ writer = csv.writer(out, dialect="excel-tab")
+ writer.writerows(rows)
+
+ tmp.rename(file)
+
+ mpn_csv = out_dir / "mpn.csv"
new_facts = 0
- with (work_dir / "out" / "fact.csv").open("r") as f:
+ with (out_dir / "fact.csv").open("r") as f:
reader = csv.reader(f, dialect="excel-tab")
for uri, key, value in reader:
try: