SHELL=/bin/bash INPUT=2025-07-01T2031_NB_generated.pdf #INPUT=2025-07-01T2031_NB_generated.ocr.pdf # ocrmypdf -l nor --force-ocr --sidecar $(pwd)/sidecar.txt $(pwd)/2025-07-01T2031_NB_generated.printed.pdf $(pwd)/2025-07-01T2031_NB_generated.ocr.pdf MAKEFLAGS += -r all: constance-ring.epub JPGs:=$(wildcard image/*.jpg) TXTs:=$(patsubst image/%.jpg,txt/%.txt,$(JPGs)) page-index.txt: 2025-07-01T2031_NB_generated.pdf @rm -f page-*.pdf @mkdir -p image pdfimages -print-filenames -j $< image/page > $@.tmp @mv $@.tmp $@ images: page-index.txt ocr: $(TXTs) .PRECIOUS: image/page-%.jpg image/page-%.jpg: page-index.txt @true # nothing to do .PRECIOUS: txt/page-%.txt txt/page-%.txt:image/page-%.jpg @mkdir -p $(dir $@) tesseract -l nor $< - > $@.tmp @mv $@.tmp $@ stage-1.txt: $(TXTs) cat txt/page-{014..328}.txt > $@ stage-2.txt: stage-1.txt stage-2.awk @echo '#' $@ awk -f $(filter %.awk,$^) $< > $@.tmp @mv $@.tmp $@ stage-3.txt: stage-2.txt stage-3.awk @echo '#' $@ awk -f $(filter %.awk,$^) $< > $@.tmp @mv $@.tmp $@ stage-4.txt: stage-3.txt stage-4.awk @echo '#' $@ awk -f $(filter %.awk,$^) $< > $@.tmp @mv $@.tmp $@ stage-5.md: stage-4.txt $(wildcard pandoc-data/*) @echo '#' $@ @pandoc --data-dir=pandoc-data --from markdown -o $@.tmp.md $< @mv $@.tmp.md $@ # This actually updates dict by sorting and removing leading numbers/spaces. dict.tmp: dict touch dict.tmp cat dict |\ sed "s,^[0-9 ]*,," |\ sort |\ grep -v "^$$" |\ uniq > dict.tmp && cp dict.tmp dict spellcheck: stage-5.md hunspell -p $$(pwd)/dict -d nb_NO,constance-ring $< spellcheck-words: stage-5.md dict.tmp constance-ring.dic @echo hunspell hunspell -p $$(pwd)/dict -d nb_NO,constance-ring -l < $< | sort | uniq -c | sort -n > $@.tmp @mv $@.tmp spellcheck-words constance-ring.epub: header.md cover.jpg frontmatter.md stage-5.md spellcheck-words @echo '#' $@ @pandoc --toc -o $@ $(filter %.md,$^) constance-ring.pdf: header.md cover.jpg frontmatter.md stage-5.md spellcheck-words @echo '#' $@ @pandoc --toc -o $@ $(filter %.md,$^) .PHONY: clean clean: @rm -f stage-*.txt constance-ring.epub constance-ring.pdf clean-ocr: clean if [ -r page-index.txt ]; then cat page-index.txt | xargs rm; rm page-index.txt; fi $(V).SILENT: