aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTrygve Laugstøl <trygvis@inamo.no>2025-07-09 09:21:58 +0200
committerTrygve Laugstøl <trygvis@inamo.no>2025-07-09 09:21:58 +0200
commit258fa90f1921acdfe3c472e725a035d58b249f3b (patch)
tree712996b7f8556b9f82fbfc7ebb3531d8ea4f57ab
parent391541d62ac638b258e6244a237b72a15db88e24 (diff)
downloadconstance-ring-258fa90f1921acdfe3c472e725a035d58b249f3b.tar.gz
constance-ring-258fa90f1921acdfe3c472e725a035d58b249f3b.tar.bz2
constance-ring-258fa90f1921acdfe3c472e725a035d58b249f3b.tar.xz
constance-ring-258fa90f1921acdfe3c472e725a035d58b249f3b.zip
Spellchecking A
-rw-r--r--.gitignore1
-rw-r--r--Makefile28
-rw-r--r--README.md17
-rw-r--r--constance-ring.epubbin250443 -> 250443 bytes
-rw-r--r--dict45
-rw-r--r--spellcheck-words38
6 files changed, 78 insertions, 51 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1944fd6
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+*.tmp
diff --git a/Makefile b/Makefile
index cc7b753..7c903d7 100644
--- a/Makefile
+++ b/Makefile
@@ -4,6 +4,8 @@ INPUT=2025-07-01T2031_NB_generated.pdf
# ocrmypdf -l nor --force-ocr --sidecar $(pwd)/sidecar.txt $(pwd)/2025-07-01T2031_NB_generated.printed.pdf $(pwd)/2025-07-01T2031_NB_generated.ocr.pdf
+MAKEFLAGS += -r
+
all: constance-ring.epub
JPGs:=$(wildcard image/*.jpg)
@@ -49,18 +51,32 @@ stage-4.txt: stage-3.txt Makefile
stage-5.txt: stage-4.txt convert2.awk
@echo '#' $@
awk -f convert2.awk $< > $@.tmp
- mv $@.tmp $@
+ @mv $@.tmp $@
-stage-6.md spellcheck-words: stage-5.txt Makefile
+stage-6.md: stage-5.txt Makefile
@echo '#' $@
@pandoc --from markdown -o $@.tmp.md $<
@mv $@.tmp.md $@
- @echo spellcheck
- @hunspell -p dict -d nb_NO -l < $@ | sort | uniq -c | sort -n > spellcheck-words
-constance-ring.epub: header.md stage-6.md
+# This actually updates dict by sorting and removing leading numbers/spaces.
+dict.tmp: dict
+ cat dict |\
+ sed "s,^[0-9 ]*,," |\
+ sort |\
+ grep -v "^$$" |\
+ uniq > dict.tmp && cp dict.tmp dict
+
+spellcheck: stage-6.md
+ hunspell -p dict -d nb_NO stage-6.md dict.tmp
+
+spellcheck-words: stage-6.md dict.tmp
+ @echo hunspell
+ hunspell -p dict -d nb_NO -l < $< | sort | uniq -c | sort -n > $@.tmp
+ @mv $@.tmp spellcheck-words
+
+constance-ring.epub: header.md stage-6.md spellcheck-words
@echo '#' $@
- @pandoc --toc -o $@ $^
+ @pandoc --toc -o $@ $(filter %.md,$^)
.PHONY: clean
clean:
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..19b4f86
--- /dev/null
+++ b/README.md
@@ -0,0 +1,17 @@
+# Proofreading
+
+Run `make spellcheck-words` to run hunspell over the document. It will read
+[`dict`](./dict) as the personal dictionary and output any misspelled words into
+[`spellcheck-words`](./spellcheck-words).
+
+The book is written in a much older Norwegian that what the dictionary contains
+so there are many misidentifications.
+
+Process:
+
+For each word in `spellcheck-words`:
+
+* Check if the word is in the original PDF. If so, add it to the dictionary.
+ This happends when the word is just too old.
+* Check with manual spellcheck with `make spellcheck`. This is more tedious as
+ it runs through the entire document from the beginning.
diff --git a/constance-ring.epub b/constance-ring.epub
index bed5ab6..19e68e8 100644
--- a/constance-ring.epub
+++ b/constance-ring.epub
Binary files differ
diff --git a/dict b/dict
index 85f0779..1b64445 100644
--- a/dict
+++ b/dict
@@ -16,6 +16,9 @@ appelsindel
armstake
armstykker
askebegere
+atlaskessløifer
+avskjedsblikket
+avvennet
buffeten
bøide
Conny
@@ -24,15 +27,49 @@ Constances
dig
dreves
dørgrepet
+efterfulgtes
+eftermiddagste
+efterslett
+eftertraktelsesverdig
+eketresmøbler
+ekteskapsbåndet
+ekteskapslenke
+ekteskapsår
+enepike
+engagerer
+engagert
+enitréen
+enkesorg
entréen
+ermekniplinger
+excellence
+facit
Fallesen
fashionable
+feberaktig
+feiltrin
fintfarvede
Fiskerhans
fløielsbløtt
+fløielsmodest
+fløielspolstrede
+fløielspolstret
+fløielspute
+fløielspynt
+fløielsstoppede
forat
+forblev
+forgangenhetens
+forklædde
+forklæsnippen
+forlovelseskort
+formuesomstendigheter
fornam
+fornuftsinnvendinger
fornøielse
+fornøielser
+fornøielses
+foroverbøiet
fortrin
fortrinet
fortøininger
@@ -40,25 +77,23 @@ fripostighet
gjennem
gjennemlevde
halvhøit
-halvhøit
Huhn
hvad
høider
høire
høirøde
høist
-høist
høit
igjennem
imellem
inte
+isfornøielser
klædde
Kristianiafjorden
lebene
leber
livsmodige
lommetørklæ
-lommetørklæ
lommetørklæet
Lorck
Lorcks
@@ -81,7 +116,6 @@ opblussen
Opefter
opfatning
opførsel
-opførsel
oplagt
opmerksomt
opofrelse
@@ -110,10 +144,8 @@ tendte
tilføiet
treklædde
tøi
-tøi
tøiet
uavladelig
-uavladelig
underlebe
undseelse
undtagen
@@ -123,7 +155,6 @@ vedblev
vilde
vinterstivnet
øie
-øie
øieblikk
øieblikket
øieblikks
diff --git a/spellcheck-words b/spellcheck-words
index a05a559..10c6490 100644
--- a/spellcheck-words
+++ b/spellcheck-words
@@ -9,11 +9,8 @@
1 assuransepolise
1 ater
1 atlaskes
- 1 atlaskessløifer
1 auf
1 avs
- 1 avskjedsblikket
- 1 avvennet
1 avver
1 B
1 barndomsveninde
@@ -100,30 +97,12 @@
1 ee
1 ef
1 efier
- 1 efterfulgtes
- 1 eftermiddagste
- 1 efterslett
- 1 eftertraktelsesverdig
- 1 eketresmøbler
- 1 Ekteskapsbåndet
- 1 ekteskapslenke
- 1 ekteskapsår
1 elskovssang
1 em
1 endå
- 1 enepike
1 enfin
- 1 engagerer
- 1 engagert
- 1 enitréen
- 1 enkesorg
- 1 ermekniplinger
1 es
- 1 excellence
1 F
- 1 facit
- 1 feberaktig
- 1 feiltrin
1 ferdigfaldede
1 Feyn
1 Ffter
@@ -133,26 +112,10 @@
1 FJ
1 floskelvesen
1 flyitet
- 1 fløielsmodest
- 1 fløielspolstrede
- 1 fløielspolstret
- 1 fløielspute
- 1 fløielspynt
- 1 fløielsstoppede
- 1 forblev
1 forbry
1 foresait
- 1 forgangenhetens
- 1 forklædde
- 1 forklæsnippen
1 forlovelses
- 1 forlovelseskort
1 formidda
- 1 formuesomstendigheter
- 1 fornuftsinnvendinger
- 1 fornøielser
- 1 fornøielses
- 1 foroverbøiet
1 forskjønnelseskunster
1 forskruet
1 forsøvnig
@@ -280,7 +243,6 @@
1 instinktmessig
1 instruk
1 io
- 1 isfornøielser
1 iskold
1 iskolde
1 iskoldt