#!/usr/local/bin/perl
#
# This is a perl script called "listdorker" for dorking your classlist.
# Put it in a file called "listdorker" and make it executable by typing
#
# chmod u+x listdorker
#
# Put the electronic classlist that you get from Peter Cook in a
# file called "rawlist."  (Clean out all the mail header stuff and
# any blanklines from the file.)  Use the script by typing
#
# listdorker rawlist > output_classlist
#
# Explanation of script:
#
# A typical line of a typical classlist from Peter Cook looks like
#
# 262  02 01  TSENG, PANG-TAT BILLY BOY    2   E    3.0    999-99-9998
# 262  02 01  MC LOUGHLIN, ERIN M          4   CHE  3.0    999-99-9999
#
# The script below will put the elements of this line in an array called
# @line and then exract the various parts.  You can easily modify the
# script to tailor the output to your needs.
#
# The first element of the @line array has index zero, and  for the
# sample lines above, @line[0]="262".  The last element of @line has
# index given by $#line.  Hence, for the sample line, the social security
# number is in $line[$#line].  The student's major is in $line[$#line-2].
# The first field of the last name is in $line[3], etc.  I have to do
# something fancy to patch multiple last name fields together and to
# extract middle initials from the random number of names in fields 3
# through ($#line-4).  The script starts here:

while (<>) {     # read in a line from the raw list file

chop;            # remove the newline at the end of the line
@line = split;   # create the @line array from the input line

$ssnumber = $line[$#line];  # $#line is the last subscript of @line 
$major = $line[$#line-2];
$year = $line[$#line-3];

$namecount=0;  # initialize count for fields in last name
$lastname = $line[3];  # last name fields start at 4th entry in @line
until ($lastname =~ /,/) { # look for comma at end of last name
	$namecount++;
	$lastname = $lastname . "_" . $line[3+$namecount];
	}
chop($lastname);  # remove the comma from the lastname field

# extract middle initials from fields 4+$namecount through ($#line-4):
if ( $line[4+$namecount] =~ /-/ ) {   # treat hyphenated names case specially
	$line[4+$namecount] =~ s/([A-Z]).*-([A-Z]).*/$1.-$2./;
} else {
	$line[4+$namecount] =~ s/([A-Z]).*/$1./;
	}
$middleIs = $line[4+$namecount];  # initialize middle initials field

# look for more middle names
for ($i=5+$namecount; $i <= $#line-4; $i++) {
if ( $line[$i] =~ /-/ ) {
	$line[$i] =~ s/([A-Z]).*-([A-Z]).*/$1.-$2./;
} else {
	$line[$i] =~ s/([A-Z]).*/$1./;
}
$middleIs = $middleIs . $line[$i];  # add middle initials to field
	}
$name = $lastname . "," . $middleIs;
write;
}

# the format below for the write statement above is easy to modify:

format STDOUT =
@<<<<<<<<<<<<<<<<<<<<
$name,                    $ssnumber,       $major,  $year
.

# for example, replace the format statement above by this:
#
# format STDOUT =
# @<<<<<<<<<<<<<<<<<<<<     @<<<<<<<<<<<     @<<<<    @<
# $name,                    $ssnumber,       $major,  $year
# .
#
