diff options
author | Trygve Laugstøl <trygvis@inamo.no> | 2019-10-15 12:47:38 +0200 |
---|---|---|
committer | Trygve Laugstøl <trygvis@inamo.no> | 2019-10-15 13:37:49 +0200 |
commit | d801a8e3c61ba1a9e3effd85826f5d4d01f002e7 (patch) | |
tree | 5dc4ed6b469a0c615ddf318687c553760b0226c0 | |
parent | 0fe32987bedeafee23c6051cb9d9bb6024a559a8 (diff) | |
download | ee-python-d801a8e3c61ba1a9e3effd85826f5d4d01f002e7.tar.gz ee-python-d801a8e3c61ba1a9e3effd85826f5d4d01f002e7.tar.bz2 ee-python-d801a8e3c61ba1a9e3effd85826f5d4d01f002e7.tar.xz ee-python-d801a8e3c61ba1a9e3effd85826f5d4d01f002e7.zip |
souffle: Better quoting for data with double quote signs (").
-rw-r--r-- | src/ee/tools/part_apply_souffle_post.py | 10 | ||||
-rw-r--r-- | src/ee/tools/part_apply_souffle_pre.py | 13 |
2 files changed, 18 insertions, 5 deletions
diff --git a/src/ee/tools/part_apply_souffle_post.py b/src/ee/tools/part_apply_souffle_post.py index 9322ccb..8630603 100644 --- a/src/ee/tools/part_apply_souffle_post.py +++ b/src/ee/tools/part_apply_souffle_post.py @@ -10,6 +10,8 @@ from ee.part import Part, load_db, save_db, PartDb def work(in_path: Path, out_path: Path, work_dir: Path): + dialect = "excel-tab" + in_parts: ObjDb[Part] = ObjDb[Part]() uri_idx = in_parts.add_unique_index("uri", lambda p: p.uri) @@ -27,12 +29,12 @@ def work(in_path: Path, out_path: Path, work_dir: Path): for file in out_dir.iterdir(): if file.name.endswith(".csv"): with file.open("r") as f: - reader = csv.reader(f, dialect="excel-tab") + reader = csv.reader(f, dialect=dialect) rows = sorted(reader) tmp = Path(str(file) + ".tmp") with tmp.open("w") as out: - writer = csv.writer(out, dialect="excel-tab") + writer = csv.writer(out, dialect=dialect) writer.writerows(rows) tmp.rename(file) @@ -41,7 +43,7 @@ def work(in_path: Path, out_path: Path, work_dir: Path): new_facts = 0 with (out_dir / "add_fact.csv").open("r") as f: - reader = csv.reader(f, dialect="excel-tab") + reader = csv.reader(f, dialect=dialect) for uri, key, value in reader: try: part = uri_idx.get_single(uri) @@ -52,7 +54,7 @@ def work(in_path: Path, out_path: Path, work_dir: Path): if mpn_csv.is_file(): with mpn_csv.open("r") as f: - reader = csv.reader(f, dialect="excel-tab") + reader = csv.reader(f, dialect=dialect) for uri, mpn in reader: try: part = uri_idx.get_single(uri) diff --git a/src/ee/tools/part_apply_souffle_pre.py b/src/ee/tools/part_apply_souffle_pre.py index 790ffa2..17dff44 100644 --- a/src/ee/tools/part_apply_souffle_pre.py +++ b/src/ee/tools/part_apply_souffle_pre.py @@ -8,8 +8,19 @@ from ee.part import Part, load_db from ee.tools import mk_dirs +class SouffleDialect(csv.Dialect): + """Describe the usual properties of Excel-generated CSV files.""" + delimiter = "\t" + quotechar = '¤' + doublequote = True + skipinitialspace = False + lineterminator = '\r\n' + quoting = csv.QUOTE_MINIMAL + # quoting = csv.QUOTE_NONE + + def write_csvs(print_header, in_parts, part_f, part_pn_f, fact_f): - dialect = "excel-tab" + dialect = SouffleDialect part_csv = csv.writer(part_f, dialect=dialect) if print_header: part_csv.writerow(["uri", "ref"]) |