SHELL=/bin/bash INPUT=2025-07-01T2031_NB_generated.pdf #INPUT=2025-07-01T2031_NB_generated.ocr.pdf # ocrmypdf -l nor --force-ocr --sidecar $(pwd)/sidecar.txt $(pwd)/2025-07-01T2031_NB_generated.printed.pdf $(pwd)/2025-07-01T2031_NB_generated.ocr.pdf all: constance-ring.epub JPGs:=$(wildcard image/*.jpg) TXTs:=$(patsubst image/%.jpg,txt/%.txt,$(JPGs)) page-index.txt: 2025-07-01T2031_NB_generated.pdf @rm -f page-*.pdf @mkdir -p image pdfimages -print-filenames -j $< image/page > $@.tmp @mv $@.tmp $@ images: page-index.txt ocr: $(TXTs) .PRECIOUS: image/page-%.jpg image/page-%.jpg: page-index.txt @true # nothing to do .PRECIOUS: txt/page-%.txt txt/page-%.txt:image/page-%.jpg @mkdir -p $(dir $@) tesseract -l nor $< - > $@.tmp @mv $@.tmp $@ stage-1.txt: $(TXTs) cat txt/page-{014..328}.txt > $@ stage-2.txt: stage-1.txt @echo '#' $@ @# tr '\f' '\n' < $< > $@ @cp $< $@ stage-3.txt: stage-2.txt convert.awk @echo '#' $@ @awk -f convert.awk $< > $@.tmp @mv $@.tmp $@ stage-4.txt: stage-3.txt Makefile @echo '#' $@ @uniq $< > $@.tmp @mv $@.tmp $@ stage-5.txt: stage-4.txt convert2.awk @echo '#' $@ awk -f convert2.awk $< > $@.tmp mv $@.tmp $@ stage-6.md spellcheck-words: stage-5.txt Makefile @echo '#' $@ @pandoc --from markdown -o $@.tmp.md $< @mv $@.tmp.md $@ @echo spellcheck @hunspell -p dict -d nb_NO -l < $@ | sort | uniq -c | sort -n > spellcheck-words constance-ring.md: header.md stage-6.md @echo '#' $@ @cat $^ > $@.tmp @mv $@.tmp $@ constance-ring.epub: constance-ring.md @echo '#' $@ @pandoc --toc -o $@ $< .PHONY: clean clean: if [ -r page-index.txt ]; then cat page-index.txt | xargs rm; rm page-index.txt; fi @rm -f stage-*.txt $(V).SILENT: