diff options
| author | Trygve Laugstøl <trygvis@inamo.no> | 2025-07-02 21:14:40 +0200 |
|---|---|---|
| committer | Trygve Laugstøl <trygvis@inamo.no> | 2025-07-02 21:29:03 +0200 |
| commit | 56f785f42845b5e58bbf89201981c564abdc0b63 (patch) | |
| tree | 66f0274a26e0c237e9153f71a3c171dde3b98157 /Makefile | |
| parent | 7dd19ae1227d928795fe965bdcbf686b4dfaba46 (diff) | |
| download | constance-ring-56f785f42845b5e58bbf89201981c564abdc0b63.tar.gz constance-ring-56f785f42845b5e58bbf89201981c564abdc0b63.tar.bz2 constance-ring-56f785f42845b5e58bbf89201981c564abdc0b63.tar.xz constance-ring-56f785f42845b5e58bbf89201981c564abdc0b63.zip | |
wip
Diffstat (limited to 'Makefile')
| -rw-r--r-- | Makefile | 52 |
1 files changed, 47 insertions, 5 deletions
@@ -1,17 +1,58 @@ +SHELL=/bin/bash +INPUT=2025-07-01T2031_NB_generated.pdf +#INPUT=2025-07-01T2031_NB_generated.ocr.pdf + +# ocrmypdf -l nor --force-ocr --sidecar $(pwd)/sidecar.txt $(pwd)/2025-07-01T2031_NB_generated.printed.pdf $(pwd)/2025-07-01T2031_NB_generated.ocr.pdf + all: constance-ring.epub -stage-1.txt: 2025-07-01T2031_NB_generated.pdf - pdftotext -layout $< $@ +JPGs:=$(wildcard image/*.jpg) +TXTs:=$(patsubst image/%.jpg,txt/%.txt,$(JPGs)) + +page-index.txt: 2025-07-01T2031_NB_generated.pdf + @rm -f page-*.pdf + @mkdir -p image + pdfimages -print-filenames -j $< image/page > $@.tmp + @mv $@.tmp $@ + +images: page-index.txt +ocr: $(TXTs) + +.PRECIOUS: image/page-%.jpg +image/page-%.jpg: page-index.txt + @true # nothing to do + +.PRECIOUS: txt/page-%.txt +txt/page-%.txt:image/page-%.jpg + @mkdir -p $(dir $@) + tesseract -l nor $< - > $@.tmp + @mv $@.tmp $@ + +stage-1.txt: $(TXTs) + cat txt/page-{014..328}.txt > $@ + # pdftotext -layout $< $@ stage-2.txt: stage-1.txt @echo $@ - tr '\f' '\n' < $< > $@ + # tr '\f' '\n' < $< > $@ + cp $< $@ stage-3.txt: stage-2.txt convert.awk @echo $@ - awk -f convert.awk $< > $@ + awk -f convert.awk $< > $@.tmp + @mv $@.tmp $@ + +stage-4.txt: stage-3.txt Makefile + @echo $@ + uniq $< > $@.tmp + @mv $@.tmp $@ + +stage-5.md: stage-4.txt Makefile + @echo $@ + pandoc --from markdown -o $@.tmp.md $< + @mv $@.tmp.md $@ -constance-ring.md: header.md stage-3.txt +constance-ring.md: header.md stage-5.md @echo $@ cat $^ > $@.tmp mv $@.tmp $@ @@ -21,4 +62,5 @@ constance-ring.epub: constance-ring.md .PHONY: clean clean: + if [ -r page-index.txt ]; then cat page-index.txt | xargs rm; rm page-index.txt; fi @rm -f stage-*.txt |
