diff options
Diffstat (limited to 'stage-2.awk')
| -rw-r--r-- | stage-2.awk | 54 |
1 files changed, 48 insertions, 6 deletions
diff --git a/stage-2.awk b/stage-2.awk index 26f045c..2e85ece 100644 --- a/stage-2.awk +++ b/stage-2.awk @@ -1,10 +1,52 @@ -# Footer -/^ *[0-9]* *Constance *Ring *[0-9]* *$/ { - do { - getline - } while(false); # ($0==""); +# This removes page footers, multiple blank lines and combines paragraphs that +# are split across pages. +# +# A split paragraph is one where there are blank lines or a page footer, *and* +# it starts with a lower case letter. + +BEGIN { + getline + prev = $0 + extra_nl = 0 } { - print + if (match($0, /^ *[0-9]* *Constance *Ring *[0-9]* *$/)) { + if (prev != "") { + print prev + } else { + extra_nl = 1 + } + getline + $0 = "" + prev = "" + } + + if ($0 == "" && prev == "") { + } else { + if (extra_nl) { + extra_nl = 0 + + m=$0 +# printf "m='%s'\n", m + u = match(m, /^[^A-Za-z]*[A-Z].*/) +# printf "u=%d, l=%d", u, l +# printf "u=%d, RSTART=%d, RLENGTH=%d, ", u, RSTART, RLENGTH + l = match(m, /^[^A-Za-z]*[a-z]/) +# printf "l=%d, RSTART=%d, RLENGTH=%d", l, RSTART, RLENGTH + if (u > l) { + printf "\n", prev + } + printf "%s", prev +# print + } else { + print prev + } + } + prev=$0 + +} + +END { + print $prev } |
