#! /bin/bash # this script converts an html file so that all markup ('<'...'>') # occurs on a line by itself with no embedded newlines. all other # characters are simply echoed. all this requires forcing bash # to read one character at a time. in=false IFS= while true do if read -n 1 c then # if newline, and we're inside of '<'...'>', just # echo a space. otherwise echo a newline if [[ $c == '' ]] then if $in then # if we're in '<...>', echo just a space echo -n ' ' else echo fi else # if '<', note it, echo a newline and '<' # else if '>', make it, and echo '>' # else echo character if [[ $c == '<' ]] then in=true echo echo -n "$c" elif [[ $c == '>' ]] then in=false echo "$c" else echo -n "$c" fi fi else break fi done
Sunday, April 25, 2010
Character at a time in bash
I was goofing around with fixing html generate from W4W. I could have use perl or a hundred other things, but I wanted see if I could do in bash, for no particular reason. Here it is.