aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTrygve Laugstøl <trygvis@inamo.no>2019-10-15 12:47:38 +0200
committerTrygve Laugstøl <trygvis@inamo.no>2019-10-15 13:37:49 +0200
commitd801a8e3c61ba1a9e3effd85826f5d4d01f002e7 (patch)
tree5dc4ed6b469a0c615ddf318687c553760b0226c0
parent0fe32987bedeafee23c6051cb9d9bb6024a559a8 (diff)
downloadee-python-d801a8e3c61ba1a9e3effd85826f5d4d01f002e7.tar.gz
ee-python-d801a8e3c61ba1a9e3effd85826f5d4d01f002e7.tar.bz2
ee-python-d801a8e3c61ba1a9e3effd85826f5d4d01f002e7.tar.xz
ee-python-d801a8e3c61ba1a9e3effd85826f5d4d01f002e7.zip
souffle: Better quoting for data with double quote signs (").
-rw-r--r--src/ee/tools/part_apply_souffle_post.py10
-rw-r--r--src/ee/tools/part_apply_souffle_pre.py13
2 files changed, 18 insertions, 5 deletions
diff --git a/src/ee/tools/part_apply_souffle_post.py b/src/ee/tools/part_apply_souffle_post.py
index 9322ccb..8630603 100644
--- a/src/ee/tools/part_apply_souffle_post.py
+++ b/src/ee/tools/part_apply_souffle_post.py
@@ -10,6 +10,8 @@ from ee.part import Part, load_db, save_db, PartDb
def work(in_path: Path, out_path: Path, work_dir: Path):
+ dialect = "excel-tab"
+
in_parts: ObjDb[Part] = ObjDb[Part]()
uri_idx = in_parts.add_unique_index("uri", lambda p: p.uri)
@@ -27,12 +29,12 @@ def work(in_path: Path, out_path: Path, work_dir: Path):
for file in out_dir.iterdir():
if file.name.endswith(".csv"):
with file.open("r") as f:
- reader = csv.reader(f, dialect="excel-tab")
+ reader = csv.reader(f, dialect=dialect)
rows = sorted(reader)
tmp = Path(str(file) + ".tmp")
with tmp.open("w") as out:
- writer = csv.writer(out, dialect="excel-tab")
+ writer = csv.writer(out, dialect=dialect)
writer.writerows(rows)
tmp.rename(file)
@@ -41,7 +43,7 @@ def work(in_path: Path, out_path: Path, work_dir: Path):
new_facts = 0
with (out_dir / "add_fact.csv").open("r") as f:
- reader = csv.reader(f, dialect="excel-tab")
+ reader = csv.reader(f, dialect=dialect)
for uri, key, value in reader:
try:
part = uri_idx.get_single(uri)
@@ -52,7 +54,7 @@ def work(in_path: Path, out_path: Path, work_dir: Path):
if mpn_csv.is_file():
with mpn_csv.open("r") as f:
- reader = csv.reader(f, dialect="excel-tab")
+ reader = csv.reader(f, dialect=dialect)
for uri, mpn in reader:
try:
part = uri_idx.get_single(uri)
diff --git a/src/ee/tools/part_apply_souffle_pre.py b/src/ee/tools/part_apply_souffle_pre.py
index 790ffa2..17dff44 100644
--- a/src/ee/tools/part_apply_souffle_pre.py
+++ b/src/ee/tools/part_apply_souffle_pre.py
@@ -8,8 +8,19 @@ from ee.part import Part, load_db
from ee.tools import mk_dirs
+class SouffleDialect(csv.Dialect):
+ """Describe the usual properties of Excel-generated CSV files."""
+ delimiter = "\t"
+ quotechar = '¤'
+ doublequote = True
+ skipinitialspace = False
+ lineterminator = '\r\n'
+ quoting = csv.QUOTE_MINIMAL
+ # quoting = csv.QUOTE_NONE
+
+
def write_csvs(print_header, in_parts, part_f, part_pn_f, fact_f):
- dialect = "excel-tab"
+ dialect = SouffleDialect
part_csv = csv.writer(part_f, dialect=dialect)
if print_header:
part_csv.writerow(["uri", "ref"])