------------------------------------------------------------------------------- Perl Style Summery... Following the Perl style guide, identifiers in all capitals are reserved for those with special meaning to Perl itself. Functions and local variables are all lowercase. The module's persistent variables (either file lexicals or package globals) are capitalized. Identifiers with multiple words have each of these separated by an underscore to make it easier to read. Please don't use mixed capitals without underscores - you wouldn't like reading this book without spaces, either. ------------------------------------------------------------------------------- Versions perl -V # all %Config defines perl -V:archname # the architucture to use for installation perl '-V:install.*' # the installation directories perl -MTk -e 'print $Tk::VERSION,"\n"' perl -MCGI -le 'print $CGI::VERSION' NOTE: -l turns on end of line auto handler (chomp input, add \n to output) pmdesc -v -w -s # print all perl modules and version numbers # perl cookbook recipe 12.19 example 12-3 Debugger perl -d:ptkdb myscript.pl ------------------------------------------------------------------------------- Argument handling #----------------------- #Shell Script Usage method #!/usr/bin/perl # # affine_rotate angle [center_x,y] [new_center_x,y] # # Generate a affine matrix, that rotates an image by a specific angle. # #### # # by Anthony Thyssen (September 2005) # use strict; use FindBin; my $PROGNAME = $FindBin::Script; sub Usage { print STDERR @_, "\n"; @ARGV = ( "$FindBin::Bin/$PROGNAME" ); while( <> ) { next if 1 .. 2; last if /^###/; s/^#$//; s/^# //; print STDERR "Usage: " if 3 .. 3; print STDERR; } exit 10; } #----------------------- #Pod Usage Method #!/usr/bin/perl =head1 NAME program - A program with pod documention =head1 SYNOPSIS program [options] file... Options: -f Switch or Flag -a arg Option with agument -d|--debug Turn on debugging --help Quick Help (synopsis) --manual Program Manual =head1 DESCRIPTION Program documention in the body of the document. =head1 AUTHOR Anthony Thyssen 3 December 2002 A.Thyssen_AT_griffith.edu.au =cut use strict; use FindBin; my $PROGNAME = $FindBin::Script; sub Usage { eval { use Pod::Usage; #unshift( @_, '-msg' ) if @_ == 1; #pod2usage ( @_, -exitval => 10 ); pod2usage ( @_ ); }; } #------------------ #Argument handling # # This method allows you to have options in the forms... # Multi-switch options: -dvr # Option arguments either: -nNAME OR -n NAME # Optional option argument: -i.bak OR -i # tar-like option args: -fbs filename blocks skip # Sort like numerical ranges: -k20,30 # # Developed from the Perl Camel Book v3, page 122 # Programmers Note: # Within the inner option block... # "next" is equivelent to a "next OPTION" # "last" is equivelent to a "last OPTION" # "redo" means look for more multi-switch options (inner block) ARGUMENT: # Multi-switch option handling while( @ARGV && $ARGV[0] =~ s/^-(?=.)// ) { $_ = shift; { m/^$/ && do { next }; # Next argument m/^-$/ && do { last }; # End of options m/^\?/ && do { Usage }; # Usage Help m/^-help$/ && Usage( -verbose => 1); # quick help (synopsis) m/^-manual$/ && Usage( -verbose => 2); # inline manual m/^-debug$/ && do { $debug++; next }; # --word switches s/^d// && do { $debug++; redo }; # \ s/^f// && do { $force++; redo }; # > multi-switch options s/^v// && do { $verbose++; redo }; # / s/^n// && do { $name = $_ || shift; next }; # "-nARG" OR "-n ARG" s/^l// && do { $level = length() ? $_ : shift; next }; # "-l0" OR "-l 0" s/^i// && do { $back = $_ || ".bak"; next }; # "-i.sfx" OR just "-i" s/^f// && do { $file = shift; redo }; # tar-like option arguments s/^b// && do { $blocks = shift; redo }; # Eg: s/^s// && do { $skip = shift; redo }; # -fbs filename blocks skip s/^k(\d+)// && do { $start = $1; redo }; # Sort like numerical ranges s/^,(\d+)// && do { $end = $1; redo }; # EG: -k10,20 Usage( "$PROGNAME: Unknown Option \"-$_\"" ); } continue { next ARGUMENT }; last ARGUMENT; } Usage( "$PROGNAME: Too Few Arguments" ) unless @ARGV; while( <> ) { ... } ------------------------------------------------------------------------------- Alturnative argument handling (old method) This is the older method of handling command line swicthes. ALl options must be separate command line arguments... EG: tar -x -v -f filename OR tar -x -v -ffilename # The `and' below is to prevent a undefined variable warning. # It is also lower precidence so parentheses are not needed for assignment. while( $_ = $ARGV[0] and ($_, my $arg) = /^-(.)(.*)/ ) { $arg = '' unless defined $arg; # Ensure $arg is defined shift; /-/ && do { last }; # End of options /c/ && do { $pass = 1; next }; # Flag Option /n/ && do { $user = $arg || shift; next }; # Option Argument /T/ && do { # Special option $arg = $arg || shift; 'opt' eq $arg && do { $option = 1; next }; &Usage( "Bad Special -T Option `", $arg, "'\n" ); } /Z/ && do { # List of Special Options for $arg ( split(/;/, $arg || shift) ) { ($_,$value) = split(/=/, $arg, 2); $value = '' unless defined $value; /^binary$/ && do { $literal = 1; next }; /^log$/ && do { $log_file = $value; next }; # &Usage( "Bad Special -Z Option `", $arg, "'\n" ); } }; # Any unknown options -- generalised could get it wrong! # shift unless $arg || ARGV[0] =~ /^-/; &Usage( "Unknown or Bad Option \"-$_$arg\"\n" ); } ------------------------------------------------------------------------------- Indented Here Document (my $prog = $0) =~ s/^.*\///; sub Usage { die @_, herefile(" | Usage: $prog [-options] file... | -d Output extra debugging information # -e Obsolete Option, do not putput to the user "); } print herefile( <<'EOF' ); | yes the indent to the left is removed, | and the type of indent can changed to suit data # You can even add comment lines into the here file! | you can print this # but don't print this EOF # Remove the indent of a here file... # Adjust to suit you here file requirements sub herefile { my $string = shift; $string =~ s/^\s+#.*\n//gm; # completely remove full line comments $string =~ s/#.*//g; # remove end-of-line comments $string =~ s/^\s+\| ?//gm; # remove the indent part of the line $string =~ s/\s+$/\n/g; # remove any extra end-of-line spaces return $string; } ------------------------------------------------------------------------------- Legal tricks # tricky splits ($num) = /\d+/; # just the matched string ($a, $b, $c) = "123 4 56" =~ /\d+/g; # three matched strings ($b) = "123 4 56" =~ /\d+ (\d+) \d+/; # just the middle number ($a, $b, $c) = "123 4 56" =~ /(\d)+/g; # the last digit in each number! ($a, $b, $c) = /(\w+) (\w+) (\w+)/; # three words only ($before, $a, $b, $c, $after) = split(/(\w+) (\w+) (\w+)/, $_, 2); # Auto flush STDOUT and STDERR select((select(STDOUT), $| = 1)[$[]); select((select(STDERR), $| = 1)[$[]); # Assigned if variable is not true $option ||= "default_value"; # Assign from cache if posible, otherwise look it up! $uid = $user{$user} ||= getpwnam($user); # make a backup of all the listed files perl -p -i.bak -e '' # Copy and Modify an Array (which version is better?) # Remember normally "for" or "map" will modify the actual array given!!! for( @new = @old ) { s/bad/good/g }; map { s/bad/good/g } ( @new = @old ); # Constant Variables (read only) $PI = 3; # This will Fail! $PI++; # That is, this would work when it shouldn't *PI = \3.1415927; # This won't print "PI = $PI\n"; # subroutine constant sub PI() { 3.1415927 } # approximaton sub PI() { 4 * atan2(1,1) } # will be re-calculated multiple times!!!! print "PI = ", PI, "\n"; # This will calculate only once! but works like a subroutine constant use constant PI => 4 * atan2(1,1); # calculated ONCE only Binary Patch -- set netscape binary for strong encryption #!/usr/bin/perl -0777pi s/TS:.*?\0/$_=$&;y,a-z, ,;s, $,true,gm;s, 512,2048,;$_/es RE grep matching against an array of values given @words = gw(alt1 alt2 alt3); Slowly check each word, one word at a time if ( grep { /^$word$/ } @words ) { ... fi Or use RE alturnatives (assuming words are well defined) $" = '|'; if ( $word ~= /^(@words)$/ ) { ... } WARNING $host = "machine.localdomain."; $host =~ /\Q$host\E\b/; Will NOT match as $host does not end in an alphanumeric (\w) character. That is because \b only works NEXT to a \w character!!!! In this case $host ends in '.' which is not a word boundary at the end of the string. Big NO NO -- Using local in looping block. for ( 1 .. 100 ) { solution local(@array) local(@array); ===> for ( 1 .. 100 ) { ... undef @array; ... ... } } The above will cause you to have 100 @arrays before the end of the loop. Alturnative use "my" instead of local. ------------------------------------------------------------------------------- Regular Expressions.... Debuging, and seeing exactly what perl is doing... see "Programming Perl v3" p213 (using embeded prints) and p195 for RE debuging Expanding Tabs (simply) 1 while s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e; Compress multiple blank lines to one blank line (This is not easy!) perl -ne 'if (/\S/) { print; $i=0 } else {print unless $i; $i=1; }' Or inverting the search (print paragraphs) perl -ne 'print if /\S/../^\s*$/' NOTE: simplier methods exist using sed, and vim macros see file "shell/script.hints" Remove C Comments (using minimal RE expandsion - perl v5) $program =~ s{ /\* .*? \*/ }[]gsx; Remove surounding spaces in one RE The '/g' is needed to match twice, it is slower than separate REs s/^\s+│\s+$//g; Removing extra spaces (multi-line) This removes all spaces at the start and end of lines in a multi-line string. The final newline will be removed, as will all blank lines, but as '$' is a zero length match before a newline, the end of line newlines will not be removed (just the last one). $string =~ s/^\s+│\s+$//gm; Lowercase and capitalise a title/author line s/(? The input file has a declaration something like the following with |> several comments in a single line: |> |> input a, b, /* comment */ c, /* comment ******************* */ d; |> |> I need to delete the comments in between and write the declaration as |> |> input a, b, c, d; |> |> NOTE: the c, must not be deleted. It's a standard match-delimited-text problem, and the general solution is: 1: match the opening delimiter 2: match stuff that's not the closing delimiter 3: match the closing delimiter In this case, the opening delimiter is "/*" so the regex is "/\*". The closing delimiter is "*/", so that regex is "\*/". Stuff that's not the closing delimiter would be A) anything that's not / (regex "[^/]" ) and B) any / so long as it has no * before it (regex "[^*]/") Combining them with an indication to say "as much as is there", we get: ([^/]|[^*]/)* So the whole regex, wrapped in some perl, would be: s#/\*([^/]|[^*]/)*\*/##g; part number from above: 111222222222222233 Note that there's another way to conceptually look at the "stuff not the closing delimiter". That'd be: A) anything not a * (regex "[^*]") and B) any * so long as it's not followed by a / (regex "\*[^/]") That would lead to s#/\*([^*]|\*[^/])*\*/##g; However, since the "\*[^/]" eats a character, it could eat the third * in the string "/* commet **/" and we'd wedgie the regex and it wouldn't match. The first way described above only eats characters we've already had a chance to check aren't the ending, so it won't wedgie. Jeffrey Friedl ------------------------------------------------------------------------------- Misteries of the "comma" operator # NOTE Parentheses play a very important role # Also the ',' is of lower precedance (less important) than '=' $scalar = 'a', 'b', 'c'; # scalar; $scalar = 'a' $scalar = ('a', 'b', 'c'); # scalar; $scalar = 'c' ($scalar) = ('a', 'b', 'c'); # list; $scalar = 'a' @array = 'a', 'b', 'c'; # scalar; @array = ('a') @array = ('a', 'b', 'c'); # list; @array = ('a', 'b', 'c') ------------------------------------------------------------------------------- Scalar/List Contex in functions sub A { return ('a', 'b', 'c'); } sub B { # This is DIFFERENT to others my @array = ('a', 'b', 'c'); return @array; } sub C { my @array = ('a', 'b', 'c'); return @array[0..$#array]; } sub D { my @array = ('a', 'b', 'c'); return ( ( @array ) ); } $a = A(); # $a = ('a', 'b', 'c') => $a = 'c' # Note comma operator ($b) = A(); # ($b) = ('a', 'b', 'c') => $b = 'a' $c = B(); # $c = @array => $c = 3 ($d) = B(); # ($d) = @array => $d = 'a' $e = C(); # $e = 'c' # C() behaves like A()!!! ($f) = C(); # $f = 'a' # D() will also act like A()!!! Of course a function can ask what context it is in... This is of course equivenelt to A(), C() and D() sub E { my @array = ('a', 'b', 'c'); return wantarray ? @array : $array[0]; } ------------------------------------------------------------------------------- Word finding optimizations On a large string (like a whole file) Where $_ = "whole_file_of_words" Comparing individual word is faster than a RE EG: &word{ qw( and or then last next ) } @file = grep { defined $word{lc($_) } split; is 3 times faster than $word = "(and|or|then|last|next|........)" s/ $word / /ig; Using minimal matching s/\b(\w*?aaa\w*?) / /ig; is faster than normal longest matching s/\b(\w*aaa\w*) / /ig; for very short matches on very long strings (whole files in memory) Extreme care is advised on the sub-strings. Word boundary is slower than a plain space s/ (and|or|then|last|next) / /ig; is faster than s/\b(and|or|then|last|next) //ig; due to complexity of the match, BUT the /g does NOT work properly without the \b (it will skip words)!! ------------------------------------------------------------------------------- Setting signals NOTE: While perl END {} blocks and module DESTROY {} blocks are called by perl when the program exits, they are NOT called when the program is interupted!!! If this is required, then you need to set up an interupt handler to actually call the "exit()" funtion. This can be don in a number of ways.... use sigtrap(die normal-signals) OR... sub set_signal { my($func) = @_; $SIG{'INT'} = $func; $SIG{'QUIT'} = $func; $SIG{'HUP'} = $func; $SIG{'TERM'} = $func; } sub Interputed { print STDERR "Interupted, doing cleanup\n"; system("rm -f ".mail_dir."/*$new"); exit 10; } set_signal( \&Interupted ); OR... # # Interupt handler for Critical Sections # # We don't want to be left with a incomplete data file. So we only note that a # interupt occured, but return to the task at hand afterward. We then exit # when it is safe to do so. Critical Sections should be kept as small as # posible, and enter and exit frequently. Also watch for any posible blocking # actions performed during the critical period. # my $interupted = 0; sub interupt { $interupted = 1; } # We were interupted? sub set_sig_handler { # turn handler on/off my( $turn_on ) = @_; die "INTERUPT CAUGHT DURING CRITICAL PERIOD -- EXITING NOW\n" if $interupted; $SIG{'HUP'} = $SIG{'QUIT'} = $SIG{'INT'} = $SIG{'TERM'} = $turn_on ? 'interupt' : 'DEFAULT'; } set_sig_handler(1); # ... critical section ... # ... clean up ... set_sig_handler(0); ------------------------------------------------------------------------------- Automatic SU if ( $> == 0 ) { print "$prog : You must be user $db_user \n" ; print "-- performing automatic su \n" ; { exec 'su', '-',$db_user,'-c', "'".join("' '","$db_home/bin/$prog",@ARGV)."'"; } print STDERR "Unable to su to $db_user -- exiting \n"; exit 0 ; }elsif ( (getpwnam($db_user))[2] != $> ) { # im not duty -- BAD print STDERR "You must be $db_user to run this script \n" ; exit 0 ; } ------------------------------------------------------------------------------- Auto Background (perl 5) exit 0 if fork; # basic background use POSIX qw(setsid); setsid(); # disassociate from terminal etc. Extra Discussion... Should he also run setgid(), and then either close filehandles 1-3 (stdin, stdout, stderr)? But we are not trying to turn the whole thing into a 'daemon' - we are just trying to "background" it so that shell returns a prompt. As such the 'setsid' is just a way to avoid having shell's SIGSTOP etc. get in the way. Truely disassociated GUI apps are very rare - most will die horribly when window manager exits when user logs out. ------------------------------------------------------------------------------- Date and Time conversion in perl time --> date string #require "ctime.pl"; # perl 4 method (newline in ctime()) use Time::localtime; $time = 999523563; $date = &ctime($time); $date =~ s/\s*\b[A-Z]{3}\b\s*/ /; # remove timezone and any linefeed print $date, "\n"; time --> formated date $time = 999523563; @d = localtime($time); # time as a 9 element array $d[4] ++; # adjust month $d[5] += 1900; # adjust year to 4 digits $date = sprintf( "%04d-%02d-%02d %02d:%02d:%02d", reverse @d[0..5] ); date --> time #repuire "timelocal.pl"; # perl 4 use Time::Local; @d = ( 18, 9, 1998 ); # date = 18 September 1998 $d[1] --; # adjust month to 0-11 $d[2] -= 1900 if $d[2] > 1900; # adjust year 2001 -> 101 $time = timelocal( 0,0,0, @d ); # midnight on that day time delta --> human readable string my($days,$hours,$mins,$secs); $secs = time - $^T; # Time since the program started $days = int ($secs/86400); $secs %= 86400; $hours = int ($secs/3600); $secs %= 3600; $mins = int ($secs/60); $secs %= 60; print "Program has been running for... ", $days > 0 ? "$days days $hours:$mins" : $hours > 0 ? "$hours hrs $mins mins" : $mins > 0 ? "$mins mins $secs secs" : "only $secs seconds" , "\n"; Date Parse Module use Date::Parse; my dt = str2time('25/02/1990 23:48:00'); $dt += 6 * 60; Date Manipulation module See http://search.cpan.org/~sbeck/DateManip-5.44/Manip.pod use Date::Manip; $date = ParseDate("25/02/1990 23:48:00"); $delta = ParseDateDelta("+ 6 minutes"); $new = DateCalc($date,$delta); # or alternate/shorthand: # $new =DateCalc("25/02/1990 23:48:00","+ 6 minutes"); # then output however you like. print &UnixDate($new,"It is now %T on %b %e, %Y."); ------------------------------------------------------------------------------- Progress Reporting.... # --- by data read from file --- $PROGRESS = 1; # Clear Progress report lines $B = `tput el`; # terminfo: clear to end of line #my $B = `tput ed`; # terminfo: clear to end of display #my $B = `tput dl 1`; # terminfo: delete line #my $B = (" "x(`tput cols`||80) . "\r"; # blank spaces -- fallback #my $B = (" "x80) . "\r"; #print "-"x100, "\r", $B, "x\n"; exit; # DEBUG print STDERR "Main Processing Loop...\n" if PROGRESS; # process start time my $start_time = time; # Setup progress report my $progress = ''; # clear progress line my $progress_total = -s $data; # data file size my( $progress_done, $progress_last ) = (0,0); open(DATA, "$data") || die("Unable to read \"$data\" : $!\n"); while( ) { $progress_done += length if $PROGRESS; #... # to output some info, clear progress info, output, restore progress print STDERR "${B}" if $PROGRESS; print "report some info\n"; print STDERR $progress if $PROGRESS; #... } continue { if( $PROGRESS ) { if ( $last_time != time ) { # update once a second (optional) $last_time = time; $progress = progress($progress_total,$progress_done,$start_time); print STDERR $B," Working...", $progress, "\r"; } } } print STDERR $B if $PROGRESS; warn("Assertion Failure: Progress count ($progress_total) ". "does not equal progress target ($progress_done)!") unless $progress_total == $progress_done; my $process_time = time_english(time - $start_time); printf "Processed %d lines in 4process_time\n", $.; # ..... # Generate the progress report -- for main loop only # Progress is on the numbers given to this sub-routine sub progress { my ($tot,$curr,$stime) = @_; return '' unless $tot && $curr; $curr /= $tot; # how much of the run have we completed my $run = (time - $stime); # time we have been running $tot = $run / $curr; # total time of run start to finish my $left = $tot - $run; # time left to finish if ( $tot > 86400*2.5 ) { # multi-day report # status report in days/hours/minutes return sprintf( "%4.1f%% %dd%02d:%02d + %dd%02d:%02d => %dd%02d:%02d", 100*$curr, $run /86400, $run /3600%24, $run /60%60, $left /86400, $left /3600%24, $left /60%60, $tot /86400, $tot /3600%24, $tot /60%60 ); } elsif ( $tot > 2400 ) { # more then 40 minutes, less 2.5 days # status report in hours/minutes return sprintf( "%4.1f%% %d:%02d + %d:%02d => %d:%02d", 100*$curr, $run /3600, $run /60%60, # $run %60, $left /3600, $left /60%60, # $left %60, $tot /3600, $tot /60%60 ); # $tot %60, } else { # status report in minutes/secones return sprintf( "%4.1f%% %d:%02d + %d:%02d => %d:%02d", 100*$curr, $run /60, $run %60, $left /60, $left %60, $tot /60, $tot %60 ); } } # convert a time in seconds to more human readble string sub time_english { my ($secs) = @_; my $days = int ($secs/86400); $secs %= 86400; my $hours = int ($secs/3600); $secs %= 3600; my $mins = int ($secs/60); $secs %= 60; return $days > 0 ? "$days days $hours:" . sprintf("%02d",$mins) : $hours > 0 ? "$hours hrs $mins mins" : $mins > 0 ? "$mins mins $secs secs" : "only $secs seconds"; } ------------------------------------------------------------------------------- Sort methods and techniques Case insenitive sorting sub case_insensitive { "\U$a" cmp "\U$b"; } Numerically sub numeric { $a <=> $b; } Sort Associated array by value not key sub byvalue { $value{$a} <=> $value{$b} } foreach key ( sort byvalue keys %value ) { BODY; } Sort by value then by by key sub val_key { $second{$a} <=> $second{$b} || $a cmp $b } Sort a hierarchal naming scheme -- Marc Horowitz IE: paths, newsgroups... sub depthfirst { $aa = $a."/~"; $aa =~ s|/|!|og; $bb = $b."/~"; $bb =~ s|/|!|og; $aa cmp $bb; } Print associattive array by value (quickly) This will create a plain array with the value before the name then print the sort output. This is very fast. $mask = "%04d %s"; for (@arr) { push (@idx, sprintf($mask, (/^\s+\((\d+)\)\s+(\S+)/))) } print @arr[ sort { $idx[$a] cmp $idx[$b]} 0 .. $#idx ]; NOTE: in the above that you CAN sort without a function (directly) Sorting by a computable field. The problem with sorting with a computable field is that you could end up computing the field at least 2 or more times in a sort function! The following using a map to extract and pre-compute the field, then sorts by that filed, and finally re-maps the original un-computed field. This is known as a ``Schwartzian Transform'' as it was popularised by Randel Schwartz. NOTE you start at the bottom line and work your way up. @sorted_fields = map { $_->[1] } sort { $a->[0] <=> $b->[0] } map { [ /(\d+)/, $_ ] } @fields; Another example with password file... print map { $_->[0] } # print the original line sort { $a->[1] <=> $b->[1] # my gid first || $a->[2] <=> $b->[2] # then uid || $a->[3] cmp $b->[3] # and by login (should not be needed) } map { [ $_, (split /:/)[3,2,0] ] } # array: line then split fields `cat /etc/passwd`; # read password file ------------------------------------------------------------------------------- Micro sleep. One method is to use the select() timeout select(undef,undef,undef,.01); You could also set an alarm... (see perl/functions/alarm.pl) ------------------------------------------------------------------------------- Spliting a string with quotes... EG: spiling a comma delimited file with quoted fields. Example: SAR001,"","Cimetrix, Inc","Bob Smith","CAM","\"",N,8,,"Error, Core Dumped" undef @fields; push( @fields, defined($1) ? $1 : $3) while m/"([^"\\]*(\\.[^"\\]*)*)"|([^,]+)/g; WARNING the above does not seem to work under perl 5 -- Anthony For space separated words such as for a shell command EG: cp -p "my file" "yourfile" you can look at... perl4: shellwords.pl library, perl5: Text::ParseWords module perl -de 1 use Text::ParseWords $line = 'cp -p "my file" "your file"' @words = shellwords $line X words @words = ( 0 'cp' 1 '-p' 2 'my file' 3 'your file' ) Perl 4 Alturnatives... Method 1: # delimit ',' with quoted strings and variable allowed $_ = 'f1,f 2,"f3","f,4",5,$time,f7'; while (/,|"|$/go) { ($within = ($within ? 0 : 1), next) if '"' eq $&; next if $within; substr($_, 0, length($`)+1) = ""; push(@fields, $`); } print join(" ", @fields),"\n"; output f1 f 2 "f3" "f,4" 5 $time f7 Method 2: Just remove the delimiter ',' from within quotes s/("[^"]*")/do{$a = $1; $a =~ tr#,#c#; $a;}/ge; now you can split the line as you would normally ------------------------------------------------------------------------------- Random selections from an array (shuffle) # create array of numbers to shuffle my($i, @number ); for( $i=1; $i<=$NUMBER; $i++ ) { push(@number, $i); } print "number list = ", join(",", @number), "\n"; # create the randomized array by removing elements from number list srand($$^time); # randomize random number generator (if desired) while( @number > 0 ) { # while we have a number to be picked push(@randomized, splice(@number, rand(@number), 1) ); # random pick } print "random list = ", join(",", @randomized), "\n"; # shuffle array in place... (Perl Cookbook Recipe 4.17) # NOTE: requires a real array argument due to prototyping sub fisher_yates_shuffle(\@) { my $array = shift; my $i; for ($i = @$array; --$i; ) { my $j = int rand ($i+1); next if $i == $j; @$array[$i,$j] = @$array[$j,$i]; } } fisher_yates_shuffle @array; ------------------------------------------------------------------------------- Incrementing a string using your own rules Perl's auto-increment of strings is limited to specific strings This method defins your own. inc($) increase a single character EG: 3 -> 4 roll($) roll a string a chars EG: 9999 -> 0000 while( <> ) { s/(.*)([0..8])([9]*)$/ $1 . inc($2) . roll($3) /e; print "$_\n"; } The first (.*) makes the RE work faster by ignoring start chars WARNING: 99999 will not increment but 099999 will ------------------------------------------------------------------------------- Format handling You can turn off page breaks the same way it does internally when it notices the lack of a top-of-form format. Just set $- to a huge number. HOWEVER this will result in the top of form NEVER being printed. Correct way is to let the first write happen then assign $- so it can never reach zero again. select(FILEHANDLE); foreach i ( @array ) { ....; write; # write top-of-form and the other lines $- = 99; # form always has 99 lines left - never end page } ------------------------------------------------------------------------------- Outputing elements in columns Example 5 rows of data. Note the newline statement is BEFORE the element printing { print "Title: " my( $i, $s ) = ( ' 'x8, ' 'x2 ); # indent and seperator my( $c, $e ) = ( 5, -1 ); # columns per row, elements left foreach ( @elements ) { print "$s" if $e > 0; print "\n$i" if $e == 0; $e = $c if $e <= 0; $e--; print "%10s", $_; } print "\n"; } In many cases this general loop can be simplified. For instance, if indent can be output on the first line as well (that is $c is never -ve), then the '$c=$r' can be merged onto the line above. If seperator is not needed or always out put then it can be simplified further.... { print "Title:" # no return from previous line my( $c, $e ) = (5); # this many elements per row foreach ( @elements ) { print("\n "),$e=$c unless $e; print " %10s", $_; $e--; } print "\n"; } The string constants of course can be substituted. ------------------------------------------------------------------------------- tr and variables problem The tr command will not accept variables, the following is a hack to allow this. This does not solve the delimiter problem however. eval "\$string =~ tr/$chars/$replacement_chars/"; ------------------------------------------------------------------------------- convert a bit vector into a list of intergers $low = -1; $high = -1; $range_cnt = 0; $printed = 0; for $i (0..($bitmap_size-1)) { if (vec($bitmap_ptr, $i, 1) == 1) { if ($low+$range_cnt == $i) { $range_cnt++; } elsif ($range_cnt > 2) { print "..", ($low+$range_cnt-1), ", $i"; $range_cnt = 1; $low = $i; } elsif ($range_cnt == 2) { print ", " if $printed; print $low+1, ", $i"; $printed = 1; $low = $i; $range_cnt = 1; } else { print ", " if $printed; print "$i"; $printed = 1; $low = $i; $range_cnt = 1; } $high = $i; } } if ($high != $low) { if ($range_cnt > 2) { print ".."; } elsif ($range_cnt == 2) { print ", " if $printed; } print "$high"; } print "\n"; dgross@rchland.vnet.ibm.com (Dave Gross) ------------------------------------------------------------------------------- Indirect function calls -- function ptrs sub foo() { print "foo( ", join(", ", @_), " )\n"; } $function = "foo"; # function expression &$function( "arg1", "arg2" ); # indirect call NOTE in version 4 $function can NOT be replaced with an expression though it can in version 5 ------------------------------------------------------------------------------- System call return checks Beware the $! is not reset by the call to system. To be on the safe side you should do: $! = 0; system('foo'); die "$0: foo: $!\n" if $!; Note that this only works if 'foo' is run without using /bin/sh. If /bin/sh is used to run the command then sh prints a message to stderr, $! will not be set, and $? >> 8 is set to one. ------------------------------------------------------------------------------- Set System Limits in perl... =======8<-------- require 'syscall.ph'; require 'sys/resource.ph'; # note h2ph doesn't always win on this one # -- hand editing may be necessary # Arrange so no core files are generated $coresize = pack("i2",0,0); syscall(&SYS_setrlimit, &RLIMIT_CORE, $coresize); # Make stack size large $stacksize = pack("i2",1024*1024*4,1024*1024*4); syscall(&SYS_setrlimit, &RLIMIT_STACK, $stacksize); =======8<-------- ------------------------------------------------------------------------------- User Accounts and perl... The following is dependant on the nsswitch and Solaris systems... getpwent() and shadow password as root The getpwent() function will return the users password to root IF * users password in located in "/etc/passwd" -- fat chance * user is listed in the /etc/shadow file and perl version is >5.005_57 * it was called on the NIS+ server && user is in the NIS+ password file and you are authorized to see that password. Only in these cases will the getpwent() perl function return the users encrypted login password. This is a real pain. Especially as perl does not have access to the system librarys shadow database getsd* functions. getpwnam(user) The getpwnam(user) will never return the users password, but will let you know if this user is actually a valid login user of this machine. EG: user is in /etc/passwd or the appropriate netgroups access to the NIS+ Login Group restrictions... A user which can not login due to some login groups restrictions (EG: NetGroups under NIS, or a LDAP authenticated login group) will NOT be listed by ANY of the getpw* functions. In other words a user which was disabled due to group access, may not be listed, dependant on the nsswitch settings (EG: "compact" setting or LDAP authenticated search restrictions) The alturnative is to always list all potential users (via nss), even if they are NOT in the right login group for this machine. Then if nessary, reject those with denied login by group access manually yourself. This however mean that unless your perl script is "pam" smart, it can NOT determine if a user still present in the password database, but is denied login access (via group), should have their home cleaned up and deleted without some other external indication of the users "final deletion". ------------------------------------------------------------------------------- Randomise srand() Randomising the srand function can be very difficult. The seed may not change quickly, or never change in a programs life time, same have only have a limited number of start seeds (0 to 60,000 for process ids). And combineing them may still be limited. Quick (limited) choices... * Current time (won't change within same `tick' time() * Using the process id of the current process $$ * Time and process id time() ^ $$ * Using the process id of a sub-shell (always different) `echo \$\$` Randomise on gziped process list ( from apatche 1.3.3 dbmmanage) sub randomise { my $psf; for (qw(-xlwwa -le)) { `ps $_ 2>/dev/null`; $psf = $_, last unless $?; } srand (unpack("%L*", `ps $psf | gzip -f`)); } Alternatively Use the `cksum' on the same source. this is faster but may not be availble on all systems. sub randomise { my $psf; for (qw(-xlwwa -le)) { `ps $_ 2>/dev/null`; # test option $psf = $_, last unless $?; } srand ( `ps $psf | cksum` ); } WARNING: process list generation could be slow! Especially on a machine using a remote password list (like LDAP or NIS+), and network problems hit. Another major problem is that the ps options vary from machine to machine which results in the need for option checking making it even slower. Also output does not change much as some parts giz archive start is a constant. Also note that perl5.005 and later automatically randomises the random number generator Albert Cahalan suggests you use the following list of "ps" options. -le -xlwwa (putting POSIX-standard -le first) -elwwa (valid for both BSD and UNIX) xlwwa -le (removing the initial "-" kills a warning on some systems) ------------------------------------------------------------------------------- Password Encryption In perl Specifically the `salt' key encryption generally you would do something like... Initialization... @salt_set = ('a'..'z', 'A'..'Z', '0'..'9', '.', '/'); $salt_size = scalar @salt_set; # should be 64 characters! Method 1... # From Example in Perl 4 Camel Book # The salt for today is seleted by the traditional method sub gen_salt { my($passwd) = @_; my($perturb1,$perturb2,$week); # perturb the salt with start of input passwd ($perturb1,$perturb2) = unpack("C2", $passwd); $week = time() / (60*60*24*7) + $perturb1 + $perturb2; return( $salt_set[ $week % $salt_size ] . $salt_set[ time() % $salt_size ] ); } crypt( $passwd, gen_salt($passwd) ); Method 2... # Extracted from dbmmanage in Apatche 1.3.3 distribution # randomise the salt for all strings. sub gen_salt { join('', map($salt_set[rand $salt_size], 1..2) ); } crypt( $passwd, gen_salt() ); Other Techniques... Generate a random password from $logname # 8 character randomised passwd # method: encrypt the logname then grab LAST 8 chars $passwd = substr( crypt( $logname, gen_salt() ), -8, 8 ); # Substitute characters which could be misinterperted # EG: characters O0Q all look simular, and dot may be missed $passwd =~ tr|0OQ./+1Il^#;|XYZabc234rst|; ------------------------------------------------------------------------------- Vgrind entry for perl programs PERL|perl|Perl:\ :pb=^\d?(sub|package)\d\p\d:\ :bb={:be=}:cb=#:ce=$:sb=":se=\e":lb=':\ :le=\e':tl:\ :id=_:\ :kw=\ if for foreach unless until while continue else elsif \ do eval require \ die exit \ defined delete reset \ goto last redo next dump \ local undef return \ write format \ sub package NOTE: things like $#, $', s#/foo##, and $foo'bar confuse vgrind ------------------------------------------------------------------------------- Suid Vulnerability (v5.002) Suid Perlscripts using suidperl or sperl are insecure due to a race condition on some systems. The program does not relinquish its root privileges properly. Patch available or get and install 5.003 or a C wrapper can be used. ftp://coombs.anu.edu.au/pub/perl/src/fixsperl-0 -------------------------------------------------------------------------------