Linux, Unix, /etc

Danger Will Robinson! You are now entering a condescending Unix user zone!
Sponsored links (requires javascript):

Scripts: Text Utilities

double: double-space the lines in a text

 
#!/usr/bin/sed -f
G


number: add line numbers to a text

 
#!/bin/sh
#number: number a list, or renumber a numbered list
awk '
{
    gsub(/^[0-9][0-9]*[ 	][ 	]*/,"");
    print ++number " " $0;
}
'


sedman: convert man pages to plain text

 
#!/usr/bin/sed -f
s/.//g
s/9//g
s/^[ 	]*//g
s/	/ /g
s/   */ /g
s/\. /.  /g


prpages: how many pages will my printout take?

Taken from .

 
#!/bin/sh
#prpages: how many pages will my printout take?
wc $* | 
awk '!/ total$/ { n += int(($1+55) / 56) }
    END	{ print n }'


truncate: truncate lines at the nth column

sed 's/	/        /g' $* |	# convert tabs to 8 spaces
awk '
BEGIN {
	N = 80			# folds at column 80
	for (i = 1; i <= n; i++)
		blanks = blanks " "
}
{
	if ((n = length($0)) <= N)
		print
	else
		printf "%s\n", substr($0,1,N)
} '


replace: replace str1 with str2

 
#!/bin/sh
#replace: replace str1 in files with str2, in place

PATH=/bin:/usr/bin:/usr/local/bin

case $# in
    0|1|2) echo 'Usage: replace str1 str2 files' 1>&2; exit 1
esac

left="$1"; right="$2"; shift; shift

for i
do
    overwrite $i sed "s%$left%$right%g" $i
done


freq: count word frequencies in a text

 
#!/bin/sh
case $1 in
	-[1-9]*) count=$1; shift;;
esac
cat $* |
tr -sc A-Za-z '\012' |
sort |
uniq -c |
sort -n |
tail ${count:—10}


showmatch: mark string that matches pattern

 
#!/bin/sh
#showmatch: mark string that matches pattern
pattern="$1"; shift
awk '
{ 
    if (match($0, pattern)) {
	print
	patmatch = ""
	for (k=1; k <= RLENGTH; k++)
	    patmatch = patmatch "^"
	printf("%"RSTART-1"s" "%-s\n","", patmatch)
    }
} ' pattern="$pattern" $*


centre.awk: centre lines in file(s) or stdin

 
#!/usr/bin/awk -f
#centre: centre lines in file(s) or stdin
#usage: centre [filenames]
BEGIN { 
    linelength = 80 
    spaces = ""
}
{
    for (i = 1; i < (linelength - length($0)) / 2; i++)
	spaces = spaces " "
    print spaces $0
}


fixhtml:fix html generated by my m4 -> html macros

#!/bin/sed -f
# fix html generated by my m4 -> html macros
# strip leading blank lines
1,/^</{
	/^$/d
}
# strip space between termination of markup tag and punctuation.
s/\(>\)  *\([.,:;")][.,:;")]*\)/\1\2/g
# strip multiple blank lines, leaving only one
/^ *$/{
	N
	/^ *\n *$/D
}
# generate correct http urlencoding for special iso-8859-1 chars;
# and for tilde, per RFC 1738
# (note: they have already been converted to HTML-encoding by htmlize)
# (but do that conversion again as well, just in case)
# do it for links
/<[Aa] [Hh][Rr][Ee][Ff]=/{
	s/\(".*\)&aacute;\(.*">\)/\1%E1\2/g
	s/\(".*\)á\(.*"\)/\1%E1\2/g
	s/\(".*\)&eacute;\(.*">\)/\1%E9\2/g
	s/\(".*\)é\(.*"\)/\1%E9\2/g
	s/\(".*\)&iacute;\(.*">\)/\1%ED\2/g
	s/\(".*\)í\(.*"\)/\1%ED\2/g
	s/\(".*\)&oacute;\(.*">\)/\1%F3\2/g
	s/\(".*\)ó\(.*"\)/\1%F3\2/g
	s/\(".*\)&uacute;\(.*">\)/\1%FA\2/g
	s/\(".*\)ú\(.*"\)/\1%FA\2/g
	s/\(".*\)&Aacute;\(.*">\)/\1%C1\2/g
	s/\(".*\)Á\(.*"\)/\1%C1\2/g
	s/\(".*\)&Eacute;\(.*">\)/\1%C9\2/g
	s/\(".*\)É\(.*"\)/\1%C9\2/g
	s/\(".*\)&Iacute;\(.*">\)/\1%CD\2/g
	s/\(".*\)Í\(.*"\)/\1%CD\2/g
	s/\(".*\)&Oacute;\(.*">\)/\1%D3\2/g
	s/\(".*\)Ó\(.*"\)/\1%D3\2/g
	s/\(".*\)&Uacute;\(.*">\)/\1%DA\2/g
	s/\(".*\)Ú\(.*"\)/\1%DA\2/g
	s/~/%7E/g
}
# and for anchors
/<[Aa] [Nn][Aa][Mm][Ee]=/{
	s/\(".*\)&aacute;\(.*">\)/\1%E1\2/g
	s/\(".*\)á\(.*"\)/\1%E1\2/g
	s/\(".*\)&eacute;\(.*">\)/\1%E9\2/g
	s/\(".*\)é\(.*"\)/\1%E9\2/g
	s/\(".*\)&iacute;\(.*">\)/\1%ED\2/g
	s/\(".*\)í\(.*"\)/\1%ED\2/g
	s/\(".*\)&oacute;\(.*">\)/\1%F3\2/g
	s/\(".*\)ó\(.*"\)/\1%F3\2/g
	s/\(".*\)&uacute;\(.*">\)/\1%FA\2/g
	s/\(".*\)ú\(.*"\)/\1%FA\2/g
	s/\(".*\)&Aacute;\(.*">\)/\1%C1\2/g
	s/\(".*\)Á\(.*"\)/\1%C1\2/g
	s/\(".*\)&Eacute;\(.*">\)/\1%C9\2/g
	s/\(".*\)É\(.*"\)/\1%C9\2/g
	s/\(".*\)&Iacute;\(.*">\)/\1%CD\2/g
	s/\(".*\)Í\(.*"\)/\1%CD\2/g
	s/\(".*\)&Oacute;\(.*">\)/\1%D3\2/g
	s/\(".*\)Ó\(.*"\)/\1%D3\2/g
	s/\(".*\)&Uacute;\(.*">\)/\1%DA\2/g
	s/\(".*\)Ú\(.*"\)/\1%DA\2/g
	s/~/%7E/g
}
# don't know how exactly this is happening, but it is
s/html ">/html">/g
# grotesquely ugly work-around for the troubles that apostrophes cause
# in my weblog scripts and m4 macros
s/\[apos\]/'/g
s/\[lq\]/`/g
s/\[rq\]/'/g


reverse: print lines in reverse order

#!/usr/bin/awk -f
	{ line[NR] = $0 }
END	{ for (i = NR; i > 0; i—) print line[i] }

Of course, there's more than one way to skin a cat. In vi, the sample operation is just eight keystrokes:

:g/^/m0

That's strictly interactive, of course. A script requires a little packaging, and explicit use of ex (the "vi" commands about are actually ex commands — the ":" at the beginning tells you that).

for i in "$*"
do
    ex -s $i << end-of-script
    g/^/mo0
    wq
    end-of-script
done


exclusive_delete_between_two_lines.sed

The sed default is an inclusive delete, that is, delete all the text between two patterns including the lines containing the patterns. This is how to delete the text between two patterns without deleting the patterns.

#!/bin/sed -f
/^XYZ$/,/^XYZ$/{
	/^XYZ$/!{
		d
	}
}


invert

Outputs lines from stdin in reverse order. What use that is exactly, I'm not sure. I must have written it for a reason, but...

#!/bin/sh
#invert: output lines from stdin in reverse order
#"The first shall be last, and the last shall be first"
awk '
	{ line[NR] = $0 }
END 	{ for (i = NR; i > 0; i—) print line[i] }
' $*


[back to Scripts index] [back to Linux, Unix, etc] [Main Site] [Weblog]



Contents licensed under the GPL