aboutsummaryrefslogtreecommitdiff
path: root/stage-2.awk
diff options
context:
space:
mode:
Diffstat (limited to 'stage-2.awk')
-rw-r--r--stage-2.awk54
1 files changed, 48 insertions, 6 deletions
diff --git a/stage-2.awk b/stage-2.awk
index 26f045c..2e85ece 100644
--- a/stage-2.awk
+++ b/stage-2.awk
@@ -1,10 +1,52 @@
-# Footer
-/^ *[0-9]* *Constance *Ring *[0-9]* *$/ {
- do {
- getline
- } while(false); # ($0=="");
+# This removes page footers, multiple blank lines and combines paragraphs that
+# are split across pages.
+#
+# A split paragraph is one where there are blank lines or a page footer, *and*
+# it starts with a lower case letter.
+
+BEGIN {
+ getline
+ prev = $0
+ extra_nl = 0
}
{
- print
+ if (match($0, /^ *[0-9]* *Constance *Ring *[0-9]* *$/)) {
+ if (prev != "") {
+ print prev
+ } else {
+ extra_nl = 1
+ }
+ getline
+ $0 = ""
+ prev = ""
+ }
+
+ if ($0 == "" && prev == "") {
+ } else {
+ if (extra_nl) {
+ extra_nl = 0
+
+ m=$0
+# printf "m='%s'\n", m
+ u = match(m, /^[^A-Za-z]*[A-Z].*/)
+# printf "u=%d, l=%d", u, l
+# printf "u=%d, RSTART=%d, RLENGTH=%d, ", u, RSTART, RLENGTH
+ l = match(m, /^[^A-Za-z]*[a-z]/)
+# printf "l=%d, RSTART=%d, RLENGTH=%d", l, RSTART, RLENGTH
+ if (u > l) {
+ printf "\n", prev
+ }
+ printf "%s", prev
+# print
+ } else {
+ print prev
+ }
+ }
+ prev=$0
+
+}
+
+END {
+ print $prev
}