diff options
| author | Trygve Laugstøl <trygvis@inamo.no> | 2025-07-11 08:00:19 +0200 |
|---|---|---|
| committer | Trygve Laugstøl <trygvis@inamo.no> | 2025-07-11 08:00:19 +0200 |
| commit | 128575f83ebab60e7ba84598676c2c52497149c2 (patch) | |
| tree | 8905ed64bd5bc96de0796f36fcd08662dabdfe4a /stage-2.awk | |
| parent | 6a37ba289436cc09270cf63810888acb7df53002 (diff) | |
| download | constance-ring-128575f83ebab60e7ba84598676c2c52497149c2.tar.gz constance-ring-128575f83ebab60e7ba84598676c2c52497149c2.tar.bz2 constance-ring-128575f83ebab60e7ba84598676c2c52497149c2.tar.xz constance-ring-128575f83ebab60e7ba84598676c2c52497149c2.zip | |
Much improved page joining.
Fixes many split paragraphs.
Diffstat (limited to 'stage-2.awk')
| -rw-r--r-- | stage-2.awk | 54 |
1 files changed, 48 insertions, 6 deletions
diff --git a/stage-2.awk b/stage-2.awk index 26f045c..2e85ece 100644 --- a/stage-2.awk +++ b/stage-2.awk @@ -1,10 +1,52 @@ -# Footer -/^ *[0-9]* *Constance *Ring *[0-9]* *$/ { - do { - getline - } while(false); # ($0==""); +# This removes page footers, multiple blank lines and combines paragraphs that +# are split across pages. +# +# A split paragraph is one where there are blank lines or a page footer, *and* +# it starts with a lower case letter. + +BEGIN { + getline + prev = $0 + extra_nl = 0 } { - print + if (match($0, /^ *[0-9]* *Constance *Ring *[0-9]* *$/)) { + if (prev != "") { + print prev + } else { + extra_nl = 1 + } + getline + $0 = "" + prev = "" + } + + if ($0 == "" && prev == "") { + } else { + if (extra_nl) { + extra_nl = 0 + + m=$0 +# printf "m='%s'\n", m + u = match(m, /^[^A-Za-z]*[A-Z].*/) +# printf "u=%d, l=%d", u, l +# printf "u=%d, RSTART=%d, RLENGTH=%d, ", u, RSTART, RLENGTH + l = match(m, /^[^A-Za-z]*[a-z]/) +# printf "l=%d, RSTART=%d, RLENGTH=%d", l, RSTART, RLENGTH + if (u > l) { + printf "\n", prev + } + printf "%s", prev +# print + } else { + print prev + } + } + prev=$0 + +} + +END { + print $prev } |
