## ## Copyright 2010 ## The Regents of the University of California ## All Rights Reserved ## ## Permission to use, copy, modify and distribute any part of this ## software package for educational, research and non-profit ## purposes, without fee, and without a written agreement is hereby ## granted, provided that the above copyright notice, this paragraph ## and the following paragraphs appear in all copies. ## ## IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY ## PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL ## DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS ## SOFTWARE, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF ## THE POSSIBILITY OF SUCH DAMAGE. ## ## THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE ## UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, ## SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. THE UNIVERSITY ## OF CALIFORNIA MAKES NO REPRESENTATIONS AND EXTENDS NO WARRANTIES ## OF ANY KIND, EITHER IMPLIED OR EXPRESS, INCLUDING, BUT NOT LIMITED ## TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A ## PARTICULAR PURPOSE, OR THAT THE USE OF THE SOFTWARE WILL NOT INFRINGE ## ANY PATENT, TRADEMARK OR OTHER RIGHTS. ## ## package tool_md5; use strict; use warnings; =head1 NAME: tool_md5 =head2 PURPOSE: In many directories checksums for all files are stored either in a single file with name md5.md5, or for each file a file .md5 exists containing the checksum for . The procedure tool_md5::check in this script confirms file integrity by recalculating the md5 checksum and comparing it against the checksums stored alongside the file. =head2 MODIFICATION HISTORY: MAR-2011, Paul Hick (UCSD/CAIDA; pphick@caida.org) =cut =head1: FUNCTION: md5_check =head2 CALLING SEQUENCE: check( $f ); =head2 INPUTS: $f fully-qualified file name of file to be checked $add_md5 if = 1, if no checksum is stored yet, then the checksum to md5.md5 in the same directory. Unless $new_md5tomd5 is also set, md5.md5 must exist and be writeable. if = 2, then an existing, but wrong checksum is removed first, before the new correct checksum is added. $new_md5dotmd5 If $add_md5 is set, an attempt is made to create md5.md5 if it does not exist already. Ignored if $add_md5 is not set. $verbose by default messages are printed only for a failed checksum test. With $verbose set a message is always printed =head2 PROCEDURE: Only regular, readable files are checked. Files with extension .md5 are ignored. Checksums are computed using the Digest::MD5 module. =head2 MODIFICATION HISTORY: MAR-2011, Paul Hick (UCSD/CAIDA) SEP-2011, Paul Hick (UCSD/CAIDA); pphick@caida.org) Added extra argument $md5_substitute to function 'check' providing a 'computed' md5 checksum (bypassing the internal checksum computation)) =cut use File::Basename qw(dirname basename); use File::Spec; use Digest::MD5; use constant { FALSE => 0, TRUE => 1, }; sub check { my ($f, $md5_substitute, $add_md5, $new_md5dotmd5, $verbose, $md5util) = @_; my $status = TRUE; # Get the checksum to compare against. # Should be in file md5.md5 or file $f.md5 my $md5_old = &get_stored_md5( $f ); # Compute checksum from file itself my $type = $md5_substitute ? 'substitute' : 'computed'; my $md5_new = $md5_substitute ? $md5_substitute : &compute_md5( $f, $md5util ) if $md5_old or $add_md5; if ( $md5_old ) { # Check whether checksums are the same my $same_md5 = $md5_new eq $md5_old; $status = FALSE unless $same_md5; if ($verbose) { if ( $same_md5 ) { print "'$f', ok\n"; } else { print failed_checksum_message( $f, $md5_new, $md5_old, $type ); } } else { print failed_checksum_message( $f, $md5_new, $md5_old, $type ) unless $same_md5; } unless ( $same_md5 ) { if ( $add_md5 > 1 ) { remove_stored_md5( $f, $verbose ); set_stored_md5( $md5_new, $f, undef, $verbose ); $status = TRUE } } } else { # No checksum to replace print "'$f', no checksum to compare against ... " . ( $add_md5 ? "add": "skip" ) . "\n" if $verbose || ! $add_md5; if ( $add_md5 ) { # Add checksum to md5.md5 file set_stored_md5( $md5_new, $f, $new_md5dotmd5, $verbose ); } else { $status = FALSE; } } return $status; } sub compute_md5 { # Compute checksum from file itself my ($file, $md5util) = @_; my $md5; if ( $md5util ) { chomp( $md5 = `$md5util < $file` ); $md5 = extract_md5( $md5, basename($file) ); } else { my $md5_obj = Digest::MD5->new; open(FILE, '<', $file) or die "Can not open '$file': $!"; binmode(FILE); while () { $md5_obj->add($_) } close(FILE); $md5 = $md5_obj->hexdigest; } return $md5; } =head1 FUNCTION: get_md5 =head2 PURPOSE: Get the md5 checksum for a file =head2 CALLING SEQUENCE: $md5 = get_md5( $file ); =head2 INPUTS: $file scalar; file name If no directory is specified, the file is assumed to be located in the working directory =head2 OUTPUTS: $md5 scalar; the md5 checksum undef if no checksum was determined =head2 CALLS: get_stored_md5, compute_md5 =head2 PROCEDURE: First check for file md5.md5. If present this contains lines in the form "<$file> ". If md5.md5 does not exist, try file $file.md5. This contains a single line with the checksum. If not found then return undef. =head2 MODIFICATION HISTORY: APR-2010, Paul Hick (UCSD/CAIDA; pphick@caida.org) =cut sub get_md5 { my ($file) = @_; my $md5; $md5 = &get_stored_md5 ( $file ); #$md5 = &get_whale_md5 ( $file ) unless $md5; bad idea?? $md5 = &compute_md5 ( $file ) unless $md5; } =head1 FUNCTION: get_stored_md5 =head2 PURPOSE: Get the md5 checksum for a file as stored alongside the file in the same directory. =head2 CALLING SEQUENCE: $md5 = get_stored_md5( $file ); =head2 INPUTS: $file scalar; file name If no directory is specified, the file is assumed to be located in the working directory =head2 OUTPUTS: $md5 scalar; the md5 checksum undef if no checksum was determined =head2 PROCEDURE: First check for file md5.md5. If present this contains lines in the form "<$file> ". If md5.md5 does not exist, try file $file.md5. This contains a single line with the checksum. If not found then return undef. =head2 MODIFICATION HISTORY: APR-2010, Paul Hick (UCSD/CAIDA; pphick@caida.org) =cut sub get_stored_md5 { my ($path) = @_; my $file = basename $path; my $md5file = File::Spec->catfile( dirname($path), 'md5.md5'); my $line; if ( -r $md5file && open MD5, "<", $md5file) { # Check for file md5.md5 # Found md5.md5 file in same directory as input file. Should contain # checksums for all files in the directory. my @lines = ; close MD5; chomp( @lines = grep { /^$file\s/ } @lines ); list_multiple_checksums_and_die( $file, $md5file, @lines ) if @lines > 1; ($file, $line) = split ' ', $lines[0] if @lines == 1; } elsif (-r "$path.md5" && open MD5, "<", "$path.md5") {# Check for file $file.md5 # Found file with same name as input file with extension .md5 appended. # This file should contain a single line with the checksum in it. chomp( $line = ); close MD5; } extract_md5( $line, $file ); } =head1 FUNCTION: set_stored_md5 =head2 PURPOSE: Add the specified md5 checksum to the file md5.md5 (located in the same directory as the file itself). =head2 CALLING SEQUENCE: $md5 = set_stored_md5( $md5, $file, $new_md5dotmd5 ); =head2 INPUTS: $md5 md5 checksum of $file $file scalar; file name If no directory is specified, the file is assumed to be located in the working directory $new_md5dotmd5 if set then create a new md5.md5 if it does not exist =head2 OUTPUTS: (none) =head2 PROCEDURE: If $new_md5dotmd5 is false then file md5.md5 must exist and be writeable. If $new_md5dotmd5 is true an attempt is made to create md5.md5 if it does not exist already. =head2 MODIFICATION HISTORY: JUN-2010, Paul Hick (UCSD/CAIDA; pphick@caida.org) =cut sub set_stored_md5 { my ($md5, $path, $new_md5dotmd5, $verbose) = @_; my $file = basename $path; my $md5file = File::Spec->catfile( dirname($path), 'md5.md5'); # Check whether md5.md5 exists and is writeable. unless ( -w $md5file && open MD5, ">>", $md5file) { if ( -e $md5file ) { print STDERR "'$md5file' exists, but is not writeable\n"; exit 1; } unless ( $new_md5dotmd5 ) { print STDERR "'$md5file' does not exist; use -c option to create one\n"; exit 2; } open MD5, ">", $md5file or die "failed to create new '$md5file'\n"; print " created new '$md5file'\n"; } # Found writeable md5.md5 file in same directory as input file. Should contain # checksums for all files in the directory. print MD5 "$file $md5\n"; close MD5; print " added checksum for '$file' to '$md5file'\n" if $verbose; } =head1 FUNCTION: remove_stored_md5 =head2 PURPOSE: Removes a checksum entry from an md5.md5 file =head2 CALLING SEQUENCE: remove_stored_md5( $file, $verbose ); =head2 INPUTS: $file scalar; file name If no directory is specified, the file is assumed to be located in the working directory =head2 PROCEDURE: =head2 MODIFICATION HISTORY: JUN-2011, Paul Hick (UCSD/CAIDA; pphick@caida.org) =cut sub remove_stored_md5 { my ($path, $verbose) = @_; my $file = basename $path; my $md5file = File::Spec->catfile( dirname($path), 'md5.md5'); if ( -r $md5file && open MD5, "<", $md5file) { # Check for file md5.md5 # Found md5.md5 file in same directory as input file. Should contain # checksums for all files in the directory. my @lines = ; close MD5; # Should match zero or one (and only one) line my @one_line = grep { /^$file\s/ } @lines; list_multiple_checksums_and_die( $file, $md5file, @one_line ) if @one_line > 1; if ( @one_line == 1 ) { my @cmd = ("mv", $md5file, $md5file.".backup"); system( @cmd ) == 0 or die "system @cmd failed: $?"; @lines = grep { $_ !~ /^$file\s/ } @lines; open MD5, ">", $md5file or die "failed to open '$md5file' for writing\n"; print MD5 @lines; close MD5; print " nuked checksum for '$file' in '$md5file'\n" if $verbose; unlink $md5file.".backup"; } else { print " no checksum for '$file' found\n" if $verbose; } } else { print STDERR "'$md5file' not readable; no changes made\n"; } } =head1 FUNCTION: extract_md5 =head2 PURPOSE: Extracts md5 checksum from string. This deals with the various formats that utilities use to output the checksum. =head2 CALLING SEQUENCE: $md5 = extract_md5( $line, $file ); =head2 INPUTS: $line scalar: string containing checksum $file scalar; file name =head2 OUTPUTS: $md5 scalar; the md5 checksum undef if no checksum was determined =head2 MODIFICATION HISTORY: APR-2010, Paul Hick (UCSD/CAIDA; pphick@caida.org) =cut sub extract_md5 { my ($line,$file) = @_; my $re = '[0-9a-f]{32}'; # If a line with an MD5 checksum in it was found, extract the checksum; # If not return undef (cannot determine checksum); # Note that we assume that the md5.md5 or $path.md5 file only contains # the file name (not a full path) ! $line ? undef : $line =~ /^($re)$/ ? $1 : # Redirect (md5 < $path) or pipe (cat $path | md5) $line =~ /^($re) [ *]-$/ ? $1 : # Redirect or pipe from md5sum $line =~ /^MD5\s*\($file\)\s*=\s*($re)$/ ? $1 : # Output from md5 and openssl md5 $line =~ /^($re) [ *]$file$/ ? $1 : # Output from md5sum undef ; } sub list_multiple_checksums_and_die { my ($file, $md5file, @lines) = @_; print "entries for '$file' in '$md5file':\n"; print " $_\n" for @lines; die "multiple checksum entries in '$md5file' for '$file'\n"; } sub failed_checksum_message { my ($file, $md5_new, $md5_old, $type ) = @_; return "'$file' failed checksum test\n" . " $md5_new is $type checksum\n" . " $md5_old is recorded checksum\n"; } 1;