#!/usr/bin/python2 #+ # NAME: # mk_archive # PURPOSE: # Burn sunspot archive on CD # CALLING SEQUENCE: # mk_archive root= create archive mirror in # mk_archive -tar root= mirror, and create archive tar ball # mk_archive -cd root= mirror, and create CD # mk_archive -tar -cd root= mirror, create CD from tar ball # OPTIONAL INPUT PARAMETERS: # root= root directory where mirror of sunspot archive # is maintained (default: $HOME/www) # -tar after mirror is completed, make tar ball of entire archive # -cd burn cd. If -tar is NOT set then the whole archive tree is # burned on one or more CDs. If -tar is set then the # tar ball is burned on a CD. # user= user and group specification used in mirror package file # group= (defaults: user=$USER, group=users> # CALLS: # tiny # RESTRICTIONS: # The Perl script mirror from SSW is used. It is located with the command: # mirror = os.path.join( os.environ['SSW' ], 'gen', 'mirror','mirror') # i.e. SSW must be installed and env variable SSW must be defined. # # The tiny module is located in $PYTHONPATH and is located by: # sys.path.append( os.path.join(os.environ['smei'],'gen','python') ) # i.e. the SMEI software tree must be installed and env var smei must be defined. # # Burning the CD requires the programs mkisofs and cdrecord. # The command to burn the CD is # cdrecord -eject -data speed=12 fs=4m dev=0,0 cd_image_file # I.e. a 12-speed writer on scsi bus 0,0 is assumed. # PROCEDURE: # A mirror of the sunspot archive on cassfos02 is created locally. # The local archive is used to create the CD, either by first creating # a tarball of the archive and burning it on a CD, or by directly # by directly burning the directory structure on CD (this may take # more than one CD. # # The package file for the Perl script mirror is created on the fly: # sunspot.package the package file used by mirror # contains mirror information for three directories # on cassfos02, which are mirrored locally to three # subdirectories in $HOME/www: # # /www2/sunspot/public_domain/ --> $HOME/www/public_domain # /www4/archive/ --> $HOME/archive # /www4/sunspot/html/images/ --> $HOME/images # # A couple of auxilliary files should be located in the same directory as # this script: # bad_links.lst contains informatio about what to do with bad links # This is needed to change the symlinks in the sunspot # archive (which are only valid on cassfos02) to valid # symlinks on the CD. # Contains lines like /www2/sunspot/=../ # Whenever a bad symlink is found pointing to /www2/sunspot # this prefix is replaced by ../. This is then used to define a # new symlink after removing the old one. # # packages.lst (optional) list of files and directories to be deleted # before burning the CDs. (NOT TESTED YET) # Contains groups of lines separated by the delimiter 'exclude:', e.g # exclude: # images/evol # exclude: # images/letters # images/cass # Each group results in a CD of the archive with the specified # files and/or directories omitted. # MODIFICATION HISTORY: # SEP-2002, Paul Hick (UCSD/CASS; pphick@ucsd.edu) #- import os, string, sys import tiny # Remove archive/name and replace it with a valid link to good_value # Used to replace bad symlinks by good symlinks. def fix_bad_link(archive, name, good_value): full_name = os.path.join(archive, name) full_target = os.path.join(archive, good_value) if os.remove(full_name) == None: sts = os.symlink(good_value, full_name) print 'symlink: ', good_value, ' --> ', full_name return # Get a list of replacement directories required to fix bad links. # File badfile contains lines like /www2/sunspot/=../. Whenever # a bad symlink is found pointing to /www2/sunspot this prefix is # replaced by ../. This is then used to define a new symlink after # removing the old one. def get_replacements(badfile): dirs = [] if os.path.exists(badfile): iu = open(badfile, 'r') rep_dirs = iu.read() iu.close() rep_dirs = string.split(rep_dirs, '\n') for rep_dir in rep_dirs: if len(rep_dir) > 0: dirs.append(string.split(rep_dir,'=')) return dirs # links is the ouput from the mirror command used to mirror the # archive on cassfos02 on the local machine. It may contain a list of # bad symlinks. These are extracted and returned in bad_links def get_bad_links(links): links = string.split(links, '\n') sym_string = 'symlink to non-existant file: ' sym_length = len(sym_string) bad_links = [] for line in links: if string.find(line,sym_string) == 0: bad_link = line[sym_length:] bad_links.append(bad_link) return bad_links # This does the actual dirty work required to fix bad links. # archive: top directory of the local sunspot archive # links : output from the mirror command used to create the local # copy of the sunspot archive # badfile: file containing the information on what to do with # bad symlinks mentioned in 'links' def fix_symlinks(archive, badfile, links): rep_dirs = get_replacements(badfile) bad_lnks = get_bad_links(links) if len(rep_dirs)*len(bad_lnks) > 0: for bad_lnk in bad_lnks: lnk = string.split(bad_lnk,' -> ') name = lnk[0] bad_value = lnk[1] for rep_dir in rep_dirs: if string.find(bad_value,rep_dir[0]) == 0: good_value = rep_dir[1]+bad_value[len(rep_dir[0]):] fix_bad_link(archive, name, good_value) # Remove remaining bad symlinks lnks = tiny.run_cmd('find '+archive+' -name \* -type l', 0) lnks = string.split(lnks, '\n') for lnk in lnks: if len(lnk) > 0: if os.path.isdir(lnk) == 0 and os.path.isfile(lnk) == 0: #print 'remaining bad symlink: ', lnk print 'removing bad lnk', lnk os.remove( lnk ) return # Reduces the size of the archive a bit by removing stuff. # archive : top directory of the local sunspot archive # exclfile: list of files and directories to be excluded. # The names after the equal sign are paths relative # to the root 'archive' def get_packages(archive, exclfile): xlists = [] if os.path.exists(exclfile): iu = open(exclfile, 'r') excls = iu.read() iu.close() excls = string.split(excls, '\n') pkg_delimiter = "exclude:" xlist = "" for excl in excls: if excl == pkg_delimiter: if xlist != "": xlists.append(xlist) xlist = "" elif len(excl) > 0: xlist = xlist+' -x '+os.path.join(archive,excl) if xlist != "": xlists.append(xlist) if len(xlists) == 0: xlists.append("") return xlists # === THE MAIN PROGRAM ==== # By default a CD is burned containing tar.gz file of the whole archive. # Setting command line argument -notar is supposed to create a couple # CDs containing a partial archive (small enough to fit the CD). THIS HAS # NOT BEEN TESTED YET. user = tiny.start('user=', sys.argv) if user == '': user = os.environ['USER'] group = tiny.start('group=', sys.argv) if group == '': group = 'users' sunspot = tiny.start('root=', sys.argv) if sunspot == '': sunspot = os.path.join(os.environ['HOME'],'www') make_tar = tiny.is_there('-tar', sys.argv) burn_cd = tiny.is_there('-cd' , sys.argv) # here : directory where this script is located (from sys.argv[0]) # (used to locate file with the info to fix bad sym links) # sunspot : local directory where sunspot archive is located # archive : subdirectory of 'sunspot' where the html files are located # mirror : path to the Perl script mirror # cd_img : name of CD image in some temp directory (will be deleted) here = (os.path.split(sys.argv[0]))[0] badfile = os.path.join(here, 'bad_links.lst') exclfile= os.path.join(here, 'packages.lst') temp = os.environ['TEMP'] archive = os.path.join(sunspot,'archive') package = os.path.join(temp,'sunspot.package') cd_img = os.path.join(temp,'sunspot.img') tarball = os.path.join(temp,'sunspot.tar.gz') update_log = os.path.join(temp,'sunspot.log') mirror = os.path.join( os.environ['SSW' ], 'gen', 'mirror','mirror') print 'Local destination : ', sunspot print 'Local archive subdir: ', archive if burn_cd: print 'Name of CD image : ', cd_img if make_tar: print 'Name of tarball : ', tarball # Before running the mirror program remove all symlinks # from the local sunspot mirror. Mirror apparently does not # download symlinks that already exist, even if they are different. tiny.run_cmd( 'find '+sunspot+' -name \* -type l -exec rm -vf {} \;', 1 ) lines = [ '#' , '# Package for directory /www2/sunspot/public_domain' , 'package=sunspot_public_domain' , 'site=cassfos02.ucsd.edu' , 'remote_user=sunspot' , 'remote_password=lightfoot' , 'remote_dir=/www2/sunspot/public_domain/' , 'local_dir='+sunspot+'/public_domain/' , 'recurse_hard=true' , 'make_bad_symlinks=true' , 'mode_copy=true' , 'passive_ftp=false' , 'use_timelocal=false' , 'user='+user , 'group='+group , 'max_delete_files=99%' , 'max_delete_dirs=99%' , 'update_log='+update_log , '#' , '# Package for directory /www4/archive' , 'package=sunspot_archive' , 'site=cassfos02.ucsd.edu' , 'remote_user=sunspot' , 'remote_password=lightfoot' , 'remote_dir=/www4/archive/' , 'local_dir='+sunspot+'/archive/' , 'recurse_hard=true' , 'make_bad_symlinks=true' , 'mode_copy=true' , 'passive_ftp=false' , 'use_timelocal=false' , 'user='+user , 'group='+group , 'max_delete_files=99%' , 'max_delete_dirs=99%' , 'update_log='+update_log , '#' , '# Package for directory /www4/sunspot/html/images' , 'package=sunspot_images' , 'site=cassfos02.ucsd.edu' , 'remote_user=sunspot' , 'remote_password=lightfoot' , 'remote_dir=/www4/sunspot/html/images/' , 'local_dir='+sunspot+'/images/' , 'recurse_hard=true' , 'make_bad_symlinks=true' , 'mode_copy=true' , 'passive_ftp=false' , 'use_timelocal=false' , 'user='+user , 'group='+group , 'max_delete_files=99%' , 'max_delete_dirs=99%' , 'update_log='+update_log ] lines = string.join( lines, '\n' )+'\n' iu = open( package, 'w' ) iu.write( lines ) iu.close() # Run the mirror program to update the sunspot archive log = tiny.run_cmd( mirror+' '+package, 1 ) os.remove( package ) # Remove all .xvpics subdirectories tiny.run_cmd( 'find '+sunspot+' -name .xvpics -type d -exec rm -rf {} \;', 0 ) # Deal with symlinks. The output from the mirror call # (stored in 'log') contains information about bad links. fix_symlinks( archive, badfile, log ) if burn_cd: if make_tar: # Move to the sunspot archive directory and create a tarball current_dir = os.getcwd() os.chdir( sunspot ) tiny.run_cmd ( 'tar -czvf '+tarball+' .', 1 ) os.chdir( current_dir ) # Write the tarball into a cd image; then remove the tarball tiny.run_cmd ( 'mkisofs -RJ -o '+cd_img+' '+tarball, 1 ) os.remove( tarball ) # Write the CD tiny.run_cmd( 'cdrecord -eject -data speed=12 fs=4m dev=0,0 '+cd_img, 1 ) os.remove( cd_img ) else: # Write the cd image file for the archive # Selectively delete some stuff, and return a list of files # directories to be omitted from the archive. xlists = get_packages( sunspot, exclfile ) for xlist in xlists: # Create cd image from sunspot archive # Currently the archive is too big to fit on single cd. # Probably need to set up two images excluding different # sets of directories tiny.run_cmd( 'mkisofs -RJ '+xlist+' -o '+cd_img+' '+sunspot, 1 ) #tiny.run_cmd( 'cdrecord -eject -data speed=12 fs=4m dev=0,0 '+cd_img, 1 ) # Delete the image file #os.remove( cd_img ) print 'done'