#!/bin/sh htmlsplit() { perl -e '$_ = join "",; tr/\n\r \t/ /s; s//>\n/g; s/\n ?\n/\n/g; s/^ ?\n//s; s/ $//s; print' } html2txt() { tr -d '' | htmlsplit | grep -v '^<' | perl -pe 's/^\s+//; s/"/"/g; s/&/\&/g; s/<//g; s/&#(\d+);/chr($1)/ge;' | tr -s ' ' | tr -s ' ' | perl -pe 's/ / /g;' } pair_lines() { perl -e ' $n = $ARGV[0] || 2; while(defined ($_=)) { chomp; push @q, $_; if (@q == $n) { print join "\t", @q; print "\n"; shift @q; } } ' } N="$1" URL="$2" if [ -n "$URL" ]; then wget "$URL" -O- else cat fi | htmlsplit | perl -ne '/^