SHELL=/bin/bash INPUT=2025-07-01T2031_NB_generated.pdf #INPUT=2025-07-01T2031_NB_generated.ocr.pdf # ocrmypdf -l nor --force-ocr --sidecar $(pwd)/sidecar.txt $(pwd)/2025-07-01T2031_NB_generated.printed.pdf $(pwd)/2025-07-01T2031_NB_generated.ocr.pdf all: constance-ring.epub JPGs:=$(wildcard image/*.jpg) TXTs:=$(patsubst image/%.jpg,txt/%.txt,$(JPGs)) page-index.txt: 2025-07-01T2031_NB_generated.pdf @rm -f page-*.pdf @mkdir -p image pdfimages -print-filenames -j $< image/page > $@.tmp @mv $@.tmp $@ images: page-index.txt ocr: $(TXTs) .PRECIOUS: image/page-%.jpg image/page-%.jpg: page-index.txt @true # nothing to do .PRECIOUS: txt/page-%.txt txt/page-%.txt:image/page-%.jpg @mkdir -p $(dir $@) tesseract -l nor $< - > $@.tmp @mv $@.tmp $@ stage-1.txt: $(TXTs) cat txt/page-{014..328}.txt > $@ # pdftotext -layout $< $@ stage-2.txt: stage-1.txt @echo $@ # tr '\f' '\n' < $< > $@ cp $< $@ stage-3.txt: stage-2.txt convert.awk @echo $@ awk -f convert.awk $< > $@.tmp @mv $@.tmp $@ stage-4.txt: stage-3.txt Makefile @echo $@ uniq $< > $@.tmp @mv $@.tmp $@ stage-5.md: stage-4.txt Makefile @echo $@ pandoc --from markdown -o $@.tmp.md $< @mv $@.tmp.md $@ constance-ring.md: header.md stage-5.md @echo $@ cat $^ > $@.tmp mv $@.tmp $@ constance-ring.epub: constance-ring.md pandoc -o $@ $< .PHONY: clean clean: if [ -r page-index.txt ]; then cat page-index.txt | xargs rm; rm page-index.txt; fi @rm -f stage-*.txt