From d801a8e3c61ba1a9e3effd85826f5d4d01f002e7 Mon Sep 17 00:00:00 2001 From: Trygve Laugstøl Date: Tue, 15 Oct 2019 12:47:38 +0200 Subject: souffle: Better quoting for data with double quote signs ("). --- src/ee/tools/part_apply_souffle_post.py | 10 ++++++---- src/ee/tools/part_apply_souffle_pre.py | 13 ++++++++++++- 2 files changed, 18 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/ee/tools/part_apply_souffle_post.py b/src/ee/tools/part_apply_souffle_post.py index 9322ccb..8630603 100644 --- a/src/ee/tools/part_apply_souffle_post.py +++ b/src/ee/tools/part_apply_souffle_post.py @@ -10,6 +10,8 @@ from ee.part import Part, load_db, save_db, PartDb def work(in_path: Path, out_path: Path, work_dir: Path): + dialect = "excel-tab" + in_parts: ObjDb[Part] = ObjDb[Part]() uri_idx = in_parts.add_unique_index("uri", lambda p: p.uri) @@ -27,12 +29,12 @@ def work(in_path: Path, out_path: Path, work_dir: Path): for file in out_dir.iterdir(): if file.name.endswith(".csv"): with file.open("r") as f: - reader = csv.reader(f, dialect="excel-tab") + reader = csv.reader(f, dialect=dialect) rows = sorted(reader) tmp = Path(str(file) + ".tmp") with tmp.open("w") as out: - writer = csv.writer(out, dialect="excel-tab") + writer = csv.writer(out, dialect=dialect) writer.writerows(rows) tmp.rename(file) @@ -41,7 +43,7 @@ def work(in_path: Path, out_path: Path, work_dir: Path): new_facts = 0 with (out_dir / "add_fact.csv").open("r") as f: - reader = csv.reader(f, dialect="excel-tab") + reader = csv.reader(f, dialect=dialect) for uri, key, value in reader: try: part = uri_idx.get_single(uri) @@ -52,7 +54,7 @@ def work(in_path: Path, out_path: Path, work_dir: Path): if mpn_csv.is_file(): with mpn_csv.open("r") as f: - reader = csv.reader(f, dialect="excel-tab") + reader = csv.reader(f, dialect=dialect) for uri, mpn in reader: try: part = uri_idx.get_single(uri) diff --git a/src/ee/tools/part_apply_souffle_pre.py b/src/ee/tools/part_apply_souffle_pre.py index 790ffa2..17dff44 100644 --- a/src/ee/tools/part_apply_souffle_pre.py +++ b/src/ee/tools/part_apply_souffle_pre.py @@ -8,8 +8,19 @@ from ee.part import Part, load_db from ee.tools import mk_dirs +class SouffleDialect(csv.Dialect): + """Describe the usual properties of Excel-generated CSV files.""" + delimiter = "\t" + quotechar = '¤' + doublequote = True + skipinitialspace = False + lineterminator = '\r\n' + quoting = csv.QUOTE_MINIMAL + # quoting = csv.QUOTE_NONE + + def write_csvs(print_header, in_parts, part_f, part_pn_f, fact_f): - dialect = "excel-tab" + dialect = SouffleDialect part_csv = csv.writer(part_f, dialect=dialect) if print_header: part_csv.writerow(["uri", "ref"]) -- cgit v1.2.3