#!/usr/bin/python """ #+ # NAME: # burncheck # PURPOSE: # Create and check checksums of L1A files # CATEGORY: # gen/python # CALLING SEQUENCE: # burncheck -gz source suspects destination # burncheck -rm source suspects # # -gz : files from suspects directory are compared to source, # if they pass they are gzipped # : and moved to destination. # -rm : files from suspects directory are compared to source, # if they pass they are deleted # : from the source directory # # source : a directory where 'clean' files are located, and # to be checked against # destination : a directory where 'clean' files are moved after # checksum is confirmed # suspects : the directory containing unchecked files, that need # to be compared to source # # INPUTS: # # # OPTIONAL INPUTS: # -rm # -gz """ import sys,os,glob from shutil import move, copy2 from subprocess import call from tiny import hide_env, args, start, is_there, count_instances, tomb from hashlib import sha256 from burnfiles import burnfiles_move def burncheck_check(source,suspects,destination=None): # generate local checksum file = os.path.basename(source) directory = os.path.dirname(source) checksumfile = os.path.join(directory,'SHA256SUM') if destination == None: checksumfile += '-compressed' try: print "Generating checksum for",file,":", checksum = sha256(open(suspects,'rb').read()).hexdigest() except IOError: print "IOError Cannot read file",file return 1 print checksum,"OK" print "Checking checksum against file",os.path.basename(checksumfile),":", filepass = 0 checkval = readkey(checksumfile, file) if checkval == checksum: print "OK" elif checkval == 0: return 1 else: print checkval, 'FAILED' return 1 if destination == None: #gzipped file, to be deleted by caller after all files pass, else leave all files return 0 else: print "Moving",file,"to",destination,':', safe_move(source,os.path.join(destination,file)) if os.path.exists(os.path.join(destination, file)): print 'OK' return 0 # copy suceeded so add it's checksum to the SHA256SUM-compressed else: print 'FAILED' return 1 def addkey(checksumfile, checksum, filename): """ Adds provided checksum to provided checksumfile with the corresponding filename. """ checkdict = {} try: FILE = open(checksumfile, 'r') for line in FILE: checkval = line.split(' ',1)[0].strip(' ') checkfile = line.split(' ',1)[1].strip(' \n') checkdict[checkval] = checkfile FILE.close() if filename in checkdict.values(): print 'WARNING: This file', filename,'already has a key present. Make sure this is what you want to do.' print ' : Please remove the old checksum by hand.' if checksum in checkdict.keys(): if checkdict[checksum] != filename: print 'ERROR: Checksum mismatch. Checksum present but references different file.' return 1 else: print 'WARNING: Checksum for file',filename,'already present in',checksumfile,'- Continuing, but you should wonder why this happened.' return 0 except IOError: print 'No such file, ',checksumfile,'Creating it.' # finally: # most systems in here are on python 2.4 which aparently doesn't include finally: FILE=open(checksumfile,'a') FILE.writelines([checksum,' ',filename,'\n']) FILE.close() print 'OK' return 0 def readkey(checksumfile, filename): """ Opens checksumfile and looks for a key matching filename. Returns string containing checksum or 0 if no checksumfile or no checksum matching filename is found. """ # this all goes down hill if there are multiple entries for a single file, only the last one will be retained # in the key/value pair, that's life, and there shouldn't be multiples anyways, if there are, fix them by hand. checkdict = {} try: FILE = open(checksumfile, 'r') for line in FILE: checkval = line.split(' ',1)[0].strip(' ') checkfile = line.split(' ',1)[1].strip(' \n') checkdict[checkfile] = checkval FILE.close() if filename in checkdict.keys(): return checkdict[filename] else: print 'No checksum found for file: ',filename return 0 except IOError: print 'No checksum file',checksumfile,'found.' return 0 def checkallkeys(checksumfile): """ Parses the contents of the checksum file. A key for each file in the directory containing the checksum file is queried. If a key is not found, one is generated and added to the checksum file. Intended to be used on L1A_DVD directory only (*.buf) files. Hopefully the compressed buffers will properly generate checksums! """ if not os.path.exists(checksumfile): print 'File not found: %s' % checksumfile sys.exit(1) pathname = os.path.dirname(checksumfile) iscompressed = (checksumfile[-10:] == 'compressed') extragz = '' if iscompressed: extragz = '.gz' files = glob.glob(os.path.join(pathname, '*.buf')) files.sort() for file in files: print 'Found buf file in compressed directory. Compressing...' status = call(['gzip', '-f',file]) checksum = sha256(open(file+'.gz').read()).hexdigest() addkey(checksumfile, checksum, os.path.basename(file)+'.gz' ) print 'Key added to',checksumfile txtfile = glob.glob(os.path.join(pathname,'l1a_dvd.txt')) for file in txtfile: status = call(['gzip', '-f',file]) files = glob.glob(os.path.join(pathname, '*.buf'+extragz)) files.sort() for file in files: status = readkey(checksumfile, os.path.basename(file)) if status == 0: print 'Generating checksum for file:',file, checksum = sha256(open(file).read()).hexdigest() addkey(checksumfile, checksum, os.path.basename(file)) print 'Key added to',checksumfile def safe_move(source, destination): try: #standard attempt to rename if fail, use external mv. os.rename(source, destination) except OSError: #caught the exception, so just move it regularly #might be good to modify this to run call_external and catch a mv exception then bail #as it stands, this assumes success and moves on. print 'IO Error: Attempt to rename across device, performing manual move' # call(["mv", os.path.join(source,file), os.path.join(destination, file)]) move(source,destination) return 0 if __name__ == '__main__': arg = args( sys.argv) narg = len(arg) compress = is_there( '-gz' , sys.argv) delete = is_there( '-rm' , sys.argv) if narg > 1: source = arg[1] if source == '.': source = os.getcwd() if narg > 2: suspects = arg[2] destination = None if suspects == '.': suspects = os.getcwd() if suspects == source: print "Something is wrong, you are comparing %s against itself. Do not use me for that."%source sys.exit(1) if narg > 3: destination = arg[3] if destination == '.': destination = os.getcwd() if delete: print "Do not specify a destination if you are deleting files (-rm). If you are not deleting files" print "then try running this again without the -rm argument." sys.exit(1) # this whole else statement bothers me. i should try something... else else: source = raw_input('Directory: ') source = os.path.expandvars( source ) if not os.path.isdir(source): print 'source is not a directory: ', source sys.exit(1) if not os.path.isdir(suspects): print 'suspects is not a directory: ', suspects sys.exit(1) suspects_file_list = glob.glob(os.path.join(suspects, '*')) suspects_file_list.sort() source_file_list = glob.glob(os.path.join(source,'*')) source_file_list.sort() source_file_list_copy = list(source_file_list) #python pitfall : source_file_list_copy = source_file_list makes a reference, not a copy, so modifying source_file_list_copy #will directly modify source_file_list, this is unless you create source_file_list_copy with the list() function! bonkers. status = 0 allstatus = 0 print "Source: ",source print "Suspects: ",suspects if destination != None: print "Destination: ",destination if compress: for file in source_file_list: file = os.path.basename(file) # check to see if there is a matching suspect file that needs to be checked suspectfile = os.path.join(suspects, file) if suspectfile in suspects_file_list: if (file != 'SHA256SUM' and file != 'l1a_dvd.txt'): status = burncheck_check(os.path.join(source,file),suspectfile, destination) if status == 0: #on the fly modification of source_file_list really tosses the #for file in iteration. the loop retains an iteration value, if you #remove entry 0, and the next entry is 1, the new list has 0->1 and 1->2 so #it will skip the old 1 and move to the new 1... source_file_list_copy.remove(os.path.join(source,file)) # adds up all the return codes, if there is a problem, well... do nothing, this is really # something that will be used with the gziped files. allstatus += status else: print "%s: no match."%file recheck_file_list = glob.glob(os.path.join(source,'*')) if len(source_file_list_copy) == len(recheck_file_list) == 2: #move SHA256SUM # print "should move SHA256SUM" print 'Moving SHA256SUM to',destination,':', safe_move(os.path.join(source,'SHA256SUM'),os.path.join(destination,'SHA256SUM')) print 'OK' #move l1a_dvd.txt to 4_dvdgz, expect l1adaemon to gzip it. # print 'Moving l1a_dvd.txt to',os.path.join(destination,'l1a_dvd.txt.gz'),':', # status = call(["gzip", os.path.join(source,'l1a_dvd.txt')]) safe_move(os.path.join(source,'l1a_dvd.txt'), os.path.join(destination, 'l1a_dvd.txt')) print 'OK' # print source_file_list else: if os.path.exists(os.path.join(source, 'SHA256SUM')): print 'Copying SHA256SUM to',destination,':', copy2(os.path.join(source, 'SHA256SUM'),os.path.join(destination,'SHA256SUM')) print 'OK' else: print 'File not found: %s.' % os.path.join(source, 'SHA256SUM') if os.path.exists(os.path.join(source, 'l1a_dvd.txt')): print 'Copying l1a_dvd.txt to',destination,':', copy2(os.path.join(source,'l1a_dvd.txt'), os.path.join(destination, 'l1a_dvd.txt')) print 'OK' else: print 'File not found: %s.' % os.path.join(source, 'l1a_dvd.txt') if delete: delete_file_list = list() problem_file_list = list() for file in source_file_list: file = os.path.basename(file) suspectfile = os.path.join(suspects,file) if suspectfile in suspects_file_list: if file[:len('SHA256SUM')] != 'SHA256SUM' and file[:len('l1a_dvd')] != 'l1a_dvd': status = burncheck_check(os.path.join(source,file),suspectfile, 0) if status == 0: #keeps track of files remaining in directory source #this way the SHA256SUM/compressed are not deleted if there #are other gzipped files still there (another dvdgz is pending check) source_file_list_copy.remove(os.path.join(source,file)) #keeps track of files to be deleted if everything is ok delete_file_list.append(os.path.join(source,file)) else: problem_file_list.append(os.path.join(source,file)) print "Bad status, something went wrong, we'll bail later" else: # remove them from the copy pending deletion # source_file_list_copy.remove(os.path.join(source,file)) delete_file_list.append(os.path.join(source,file)) allstatus = allstatus + status else: print "%s: no match."%file if allstatus == 0: for file in delete_file_list: file = os.path.basename(file) if file[:len('SHA256SUM')] != 'SHA256SUM' and file[:len('l1a_dvd')] != 'l1a_dvd': print "Deleting file:",file, os.remove(os.path.join(source,file)) print "OK" else: recheck_file_list = glob.glob(os.path.join(source,'*')) if len(source_file_list_copy) == len(recheck_file_list) == 4: os.remove(os.path.join(source,file)) else: print "Leaving file: ",file else: print "Everything wasn't OK. gzipped files not removed" print "List of problem files: ", problem_file_list # so everything should be ok, time to delete the files sys.exit()