#!/usr/bin/python # MODULE: # removedstarsearch.py # DESCRIPTION: # removedstarsearch.py is a convenience program for examining the PNT # files of processed skymaps, extracting columnar data for particular # stars, and writing the data to .TXT files with filenames corresponding # to the stars. This provides a means of isolating star subtraction # data for particular stars within PNT files, formatting the data, and # collating the formatted data in new .TXT files for later analysis. The # .TXT files generated by this program can be used by programs such # as Grapher 4 for graphical data analysis. The parameters for this # process are provided by the user as command line options. # # The process of this script's execution is relatively straightforward: # # 1. Read in any command line options specified by the user. # A. Check for invalid command line options. # B. Modify the parameters of the PNT file processing routines # based on which options were specified. # 2. Display the parameters of the PNT file processing to the user. # 3. Read in the list of star names from the file containing star # names provided by the user. # 4. If a file containing data formatting commands was provided by # the user (via a command line option; see below), then open # the file, read in the commands, and instantiate the # corresponding command classes. # 5. For each of the subdirectories c1, c1m0, c2, c2m0, c3, and c3m0 # within the base directory (the default is $SMEISKY0/equ): # A. For each file "x", if x is a PNT file and the standard SMEI # date in x's filename is within the date range specified by # the user, then: # i. For each noncomment line "l" in x beginning with one of # the star names provided by the user (i.e., a star name # stored in the file read by step 3): # I. If a formatting commands file was provided by the # user (i.e., step 4 was fully executed), then parse l # by whitespace and execute each command on the parsed # data, overwriting l with the results; otherwise, # leave l untouched. # II. Store l in a hashtable with the star name associated # with the star name as the key. # ii. Close x. # B. For each star name "n" in the list of star names generated in # step 3: # i. Create a .TXT file with n as the prefix in the # destination directory. # ii. Write each line of data associated with n from the # hashtable to the new .TXT file. # iii.Close the new .TXT file. # # Each .TXT file generated by this script has the filename format # # STARNAMEcN[m0].txt # # where STARNAME is the name of the star whose formatted data is # contained within the .TXT file and N is the camera number of the PNT # files from which the data was extracted. If the data contained within # the .TXT file came from PNT files corresponding to engineering mode # skymaps, then "m0" is added to the .TXT file's filename before the # extension. # # As mentioned in the above outline of this script's execution process, # users can specify "formatting files", or text files containing # instructions to removedstarsearch.py on which PNT file data should # be extracted and how the data should be formatted in the generated # .TXT files. If a user does not specify a formatting file, then PNT # file lines are written to the new .TXT files without modification. # For information about how to write formatting files and what formatting # commands can be specified, provide the -formathelp command line option # when executing removedstarsearch.py. For technical specifications on # the formatting commands' classes, see the documentation in # tablemanipulation.py. # # removedstarsearch.py is executed via the command line as follows: # # % removedstarsearch.py [Options] starlistfile # # Executing removedstarsearch.py without command line options (however, # starslistfile is required; see its documentation below) results in # the default behavior, which is as follows: # # + Processed skymap files and their associated PNT files are # located within the subdirectories of $SMEISKY0/equ (the "base # directory"). # + Generated .TXT files are written to the user's current directory # (i.e., ./). # + No formatting is performed on the data within the processed # PNT files. # + All located PNT files are processed. # # The complete list of valid command line options and information on # the meaning of starlistfile follows: # # -basedir=root_directory # Specifies the path to the base directory containing the # subdirectories c1, c1m0, c2, c2m0, c3, and c3m0, which contain # the PNT files to be processed. # Default: $SMEISKY0/equ # # -destdir=dest_directory # Specifies the path to the directory where the generated .TXT # files containing select data from PNT files will be written. # The given path must exist and represent a write-enabled # directory. # Default: ./ # # -formatfile=format_file # Specifies the path to a file containing information about how # lines of data read from PNT files should be formatted before # their being written to .TXT files. Information about # how this file should be structured can be acquired by using the # -formathelp command line option. If this option is not present, # then lines from PNT files are written to .TXT files without # formatting. # Default: No format file is accessed. # # -formathelp # Displays information about how the formatting file (see # -formatfile above) should be written. This information # is displayed to the user on standard output. This option acts # much like the -help option described below. # # -help # Displays a helpful usage message to the user on standard output # and terminates. # # -skipbadformats # Instructs the program to skip processing PNT files that generate # formatting errors. The program terminates upon finding # a formatting error by default. This switch overrides the default # behavior to allow the search to continue. Note that lines in # a PNT file that are processed prior to the line that causes the # formatting error are retained and later written to .TXT files. # This option is not necessary if no formatting file is provided # by the user (see -formatfile above). # # starlistfile # Path of a text file containing star names, one star name per # line. Only PNT file lines beginning with one of these star names # are processed and later written to .TXT files. Star names are # case-insensitive. This is a required command line argument and # has no default. # # -startdate=start # Limits the PNT file processing by adding a lower SMEI date # boundary (inclusive). start must be a date formatted as a # standard SMEI date (YYYY_DOY_hhmmss). Only PNT files with SMEI # dates at or later than start will be processed. The default # behavior of removedstarsearch.py is to not impose a lower # date bound on PNT files. # # -stopdate=stop # Limits the PNT file processing by adding an upper SMEI date # boundary (inclusive). stop must be a date formatted as a # standard SMEI date (YYYY_DOY_hhmmss). Only PNT files with SMEI # dates at or earlier than stop will be processed. The default # behavior of removedstarsearch.py is to not impose an upper # date bound on PNT files. # # LANGUAGE: # Python 2.4.3 # REVISION HISTORY: # April 12, 2007 (Jordan T. Vaughan, jtvaugha@ucsd.edu) # + Finalized module code for v1.00 # + Added documentation # ########### # Imports # ########### import os # Imported for file system manipulation import os.path # Ditto import re # Imported for REX functions from tablemanipulation import * # Imported for command classes import sys # Imported for command line access ############# # Variables # ############# # Default values for important variables, some of which can be specified by # the user via the command line. # The path of the directory containing subdirectories containing processed # skymaps and their PNT files. baseDirectory = '' # The path of the directory where new formatted .TXT files are written. destDirectory = '' # The path of the file containing the names of the stars that # removedstarsearch.py will use to look for particular star data. starList = '' # The path of the file containing formatting command instructions. formatFile = '' # The list containing the star names found in the file specified by starList. starNames = [] # Hashtable mapping star name strings to lists of lines of data. When data # is extracted and formatted from a PNT file for a particular star, the data # is added to the list associated with that star in this hashtable. starEntries = {} # Tuple of skymap subdirectories. subdirectories = ('c1', 'c1m0', 'c2', 'c2m0', 'c3', 'c3m0') # Tuple of camera numbers. This is parallel to subdirectories and indicates # which camera number a subdirectory corresponds to. cameraNumbers = (1, 1, 2, 2, 3, 3) # Tuple of strings containing PNT file prefixes for all three cameras. fileNameHeaders = ('c1pnt_', 'c2pnt_', 'c3pnt_') # Integer constant indicating the number of characters at the beginning of # each noncomment line of PNT files to treat as a star name. In other words, # this is the maximum character size of a star name. starNameFieldSize = 11 # List containing command class objects in the order that they are to be # executed on lines of data from PNT files. For information on command # command classes, see tablemanipulation.py. formattingCommands = [] # String representing the header to be written at the beginning of each # .TXT file generated by removedstarsearch.py. formattingHeader = '' # String representing the starting date of the PNT file processing in standard # SMEI date format. startDate = '' # String representing the ending date of the PNT file processing in standard # SMEI date format. stopDate = '' # String representing the REX form of the standard SMEI date format. SMEIDateRE = '\\d\\d\\d\\d_\\d\\d\\d_\\d\\d\\d\\d\\d\\d' # Boolean flag indicating whether or not removedstarsearch.py should terminate # when a line of data cannot be formatted according to the commands in # formattingCommands. Setting this to True means that removedstarsearch.py # continues to execute when a line cannot be formatted (the line is skipped). # Setting this to False makes removedstarsearch.py terminate when a line # cannot be formatted. skipBadFormats = False ############# # Functions # ############# # FUNCTION: # usageMessage # DESCRIPTION: # Prints a usage message to standard output. # PARAMETERS: # None # RETURN VALUE: # None # EXCEPTIONS: # None # def usageMessage(): print """ removedstarsearch.py, version 1.0 (build 4-12-2007) by Jordan T. Vaughan (jtvaugha@ucsd.edu) Examines PNT files of the REX form c(123)pnt_dddd_ddd_dddddd.txt and extracts line entries for certain stars of the user's choice. The extracted lines are written to new text files with filenames reflecting the stars' names. This program is meant to be a convenience program for isolating and organizing particular subtracted star data based on star names. Usage: %% removedstarsearch.py [Options] starlistfile Options: -basedir=root_directory Specifies the path to the base directory containing the subdirectories c1, c1m0, c2, c2m0, c3, and c3m0, which contain the PNT files to be processed. Default: $SMEISKY0/equ -destdir=dest_directory Specifies the path to the directory where the generated .TXT files containing select data from PNT files will be written. The given path must exist and represent a write-enabled directory. Default: ./ -formatfile=format_file Specifies the path to a file containing information about how lines of data read from PNT files should be formatted before their being written to .TXT files. Information about how this file should be structured can be acquired by using the -formathelp command line option. If this option is not present, then lines from PNT files are written to .TXT files without formatting. Default: No format file is accessed. -formathelp Displays information about how the formatting file (see -formatfile above) should be written. This information is displayed to the user on standard output. This option acts much like the -help option described below. -help Displays a helpful usage message to the user on standard output and terminates. -skipbadformats Instructs the program to skip processing PNT files that generate formatting errors. The program terminates upon finding a formatting error by default. This switch overrides the default behavior to allow the search to continue. Note that lines in a PNT file that are processed prior to the line that causes the formatting error are retained and later written to .TXT files. This option is not necessary if no formatting file is provided by the user (see -formatfile above). starlistfile Path of a text file containing star names, one star name per line. Only PNT file lines beginning with one of these star names are processed and later written to .TXT files. Star names are case-insensitive. This is a required command line argument and has no default. -startdate=start Limits the PNT file processing by adding a lower SMEI date boundary (inclusive). start must be a date formatted as a standard SMEI date (YYYY_DOY_hhmmss). Only PNT files with SMEI dates at or later than start will be processed. The default behavior of removedstarsearch.py is to not impose a lower date bound on PNT files. -stopdate=stop Limits the PNT file processing by adding an upper SMEI date boundary (inclusive). stop must be a date formatted as a standard SMEI date (YYYY_DOY_hhmmss). Only PNT files with SMEI dates at or earlier than stop will be processed. The default behavior of removedstarsearch.py is to not impose an upper date bound on PNT files. Examples: %% removedstarsearch.py starlist.txt Conducts a search for the stars contained within starlist.txt in the default search directory and outputs the resulting text files to the current user directory. %% removedstarsearch.py -help Displays this help message to standard output. %% removedstarsearch.py -basedir=./skymaps -destdir=./results starlist.txt Same as the first example, only the base search directory is changed to ./skymaps and the destination directory is set to ./results.\n""" # FUNCTION: # formatHelpMessage # DESCRIPTION: # Displays helpful information about the formatting file to the user on # standard output. This is meant to be called when the -formathelp command # line option is specified. # PARAMETERS: # None # RETURN VALUE: # None # EXCEPTIONS: # None # def formatHelpMessage(): print """ removedstarsearch.py, version 1.0 (build 4-12-2007) by Jordan T. Vaughan (jtvaugha@ucsd.edu) Formatting File Help: removedstarsearch.py grants users the option of formatting data from PNT file lines before writing them to their respective text files. This allows users to select which columns of data will be written to the .TXT files. In some special cases described below, data can be modified before being written to .TXT files. Format of the Formatting File: Formatting files can have any extension. Lines beginning with a semicolon (;) or colon (:) are treated as comments and are ignored by removedstarsearch.py. Comment lines can appear anywhere in formatting files. Blank lines or lines containing only whitespace are also ignored by removedstarsearch.py. Each line that is not a comment or a blank line must have the following syntax: command[, optionlist] The structure of a command is described below: command This is the name of the command (which is NOT "command"). Command names are case-sensitive. The list of all valid commands is given below. [, optionlist] A comma-separated list of data following the command. The size and content of the option list is determined by what command precedes it. Invalid or malformed command lines are flagged by removedstarsearch.py by displaying a warning to the user on standard output. Note that invalid or malformed command lines terminate removedstarsearch.py. If there are no valid commands in a formatting file passed to removedstarsearch.py, then no formatting is performed. Note that commands are executed by removedstarsearch.py in the order they are found in the formatting file. Make sure that the commands appear in the formatting file in the order you desire them to be executed. List of Commands: extract, column_number, output_field_name, output_field_size, justification This command extracts a column of data from the line entries and outputs it to the final text files under the column title output_field_name and with a space-padded field of size output_field_size. column_number must be a positive integer or zero specifying which column of data should be extracted from the initial line entries (this index is zero-based). output_field_name is a string (no quotation marks needed, but no commas are allowed!) that will become the title of the column in the outputted text files. output_field_size specifies the width of the outputted field (in characters). Neither the outputted data nor the string represented by output_field_name should have lengths exceeding output_field_size. justification is a string specifying how the columnar data is to be justified in the outputted field. Acceptable values are l (for left justification), r (for right justification), and c (for centered data). Examples: extract, 0, Date, 15, l extract, 5, Time, 8, r extract, 3, RASlope, 8, c multiplyfloatextract, output_field_name, output_field_size, justification, precision, column1, ..., columnN This command is similar to extract, but it extracts multiple columns of floating point numerical data (it can extract integral data as well) and outputs the product of the values as a floating point number. The user can specify as many columns as he or she desires, but at least one column must be specified. output_field_name, output_field_size, and justification are identical in function to their equivalents in the extract command (see above). precision is an integer specifying how many decimal digits should appear after the decimal point in the output floating point number (the product). column1, ..., columnN are the zero-based indicies of the columns to extract (these are functionally identical to the column_number parameter of the extract command; see above). Example: multiplyfloatextract, Glare*I/Istd, 12, r, 5, 4, 8 smei2days, column_number, output_field_name, output_field_size, justification, zero_year, zero_doy, zero_hour, zero_minute, zero_second, precision This command functions exactly as extract does (see above), but additionally treats the columnar data as dates in standard SMEI format (YYYY_DOY_HHMMSS) and translates them into days since some temporal origin (a "zero date"). column_number, output_field_name, output_field_size, and justification are functionally identical to their equivalents in extract. zero_year is an integer specifying the year of the zero date. zero_doy is the day of the year of the zero date. zero_hour, zero_minute, and zero_second form the time of the day of the year of the zero date. All of the zero date quantities must be natural numbers. precision is a positive integer specifying the number of digits that will appear after the decimal point. precision must be less than (output_field_size - 2) if the output is to be correctly formatted. Example: smei2days, 4, time (days), 10, l, 2003, 148, 1, 5, 5, 5 sumfloatextract, output_field_name, output_field_size, justification, precision, column1, multiplier1, ..., columnN, multiplierN This command is similar to extract, but it extracts multiple columns of floating point numerical data (it can extract integral data as well) and outputs the sum of the values as a floating point number. Each extracted numerical value is multiplied by a constant multiplier before the summation is executed. The user can specify as many column-multiplier pairs as he or she desires, but at least one column- multiplier pair must be specified with this command. output_field_name, output_field_size, and justification are identical in function to their equivalents in the extract command (see above). precision is an integer specifying how many decimal digits should appear after the decimal point in the output floating point number (the sum). column1, ..., columnN are the zero-based indicies of the columns to extract (these are functionally identical to the column_number parameter of the extract command), while multiplier1, ..., multiplierN are the column indicies' respective multipliers. The multipliers can be integral or floating-point decimal numbers. Every column index must be followed by a multiplier. Examples: sumfloatextract, RA+m*dRA, 12, r, 4, 0, 1.0, 13, 10.0 sumfloatextract, Scaled I/Istd, 15, c, 5, 5, 0.92 space, size This command outputs size space characters to the text file. space commands are useful for adding spacing between entries on lines. Thus it is wise to place a space command between each pair of other commands (such as smei2days and extract) to separate their outputs. size must be a positive integer. Examples: space, 1 space, 5 NOTE: Column indicies are zero-based and begin at the first column AFTER the star names in PNT files. Thus a star name can never be accessed by these commands.\n""" # FUNCTION: # addStarEntry # DESCRIPTION: # Adds a line of data for a given star to the hashmap starEntries. # PARAMETERS: # entryname # A string representing the name of a star. # entryvalue # A string representing the line of data associated with entryname. # RETURN VALUE: # None # EXCEPTIONS: # None # def addStarEntry(entryname, entryvalue): global starEntries if starEntries.has_key(entryname): entries = starEntries[entryname] entries.append(entryvalue) starEntries[entryname] = entries else: starEntries[entryname] = [] starEntries[entryname].append(entryvalue) # FUNCTION: # mutateStarName # DESCRIPTION: # Transforms a star name by replacing characters that would result in an # invalid file name with more tolerable characters. The transformations # are as follows: # # * is replaced with ^ # Space characters and tabs are replaced with _ # # PARAMETERS: # entryname # A string representing the name of a star. # entryvalue # A string representing the line of data associated with entryname. # RETURN VALUE: # A string containing the changed star name (if it was changed at all). # EXCEPTIONS: # None # def mutateStarName(starName): mutatedName = '' for x in starName: if x == '*': mutatedName = mutatedName + '^' elif x == ' ' or x == '\t': mutatedName = mutatedName + '_' else: mutatedName = mutatedName + x return mutatedName ################ # Main program # ################ # Examine the command line arguments. for argument in sys.argv[1:]: argument = argument.lower() # Does the user want to display the usage message? if argument == '-help': usageMessage() sys.exit() # Does the user want to change the root search directory? elif argument.startswith('-basedir='): argument = argument[9:] if baseDirectory: print 'ERROR: Base search directory specified more than once.' sys.exit() if not argument: print 'ERROR: No base directory path specified.' sys.exit() if not os.path.exists(argument): print 'ERROR: Directory ' + argument + ' does not exist.' sys.exit() if not os.path.isdir(argument): print 'ERROR: Directory ' + argument + ' is not a directory.' sys.exit() baseDirectory = argument # Does the user want to change the destination directory? elif argument.startswith('-destdir='): argument = argument[9:] if destDirectory: print 'ERROR: Destination directory specified more than once.' sys.exit() if not argument: print 'ERROR: No destination directory path specified.' sys.exit() if not os.path.exists(argument): print 'ERROR: Directory ' + argument + ' does not exist.' sys.exit() if not os.path.isdir(argument): print 'ERROR: Directory ' + argument + ' is not a directory.' sys.exit() destDirectory = argument # Does the user want to format the line entries? elif argument.startswith('-formatfile='): argument = argument[12:] if formatFile: print 'ERROR: Formatting file specified more than once.' sys.exit() if not argument: print 'ERROR: No formatting file specified.' sys.exit() if not os.path.exists(argument): print 'ERROR: Formatting file ' + argument + ' does not exist.' sys.exit() if not os.path.isfile(argument): print 'ERROR: Formatting file ' + argument + ' is not a file.' sys.exit() formatFile = argument # Does the user want to display formatting help? elif argument == '-formathelp': formatHelpMessage() sys.exit() # Did the user want to specify a lower date bound? elif argument.startswith('-startdate='): if startDate: print 'ERROR: -startdate specified more than once.' sys.exit() startDate = argument[11:] if not startDate: print 'ERROR: No starting SMEI date specified for -startdate.' sys.exit() if not re.search(SMEIDateRE, startDate): print ('ERROR: Argument provided for -startdate is not in ' + 'standard SMEI format.') sys.exit() # Did the user want to specify an upper date bound? elif argument.startswith('-stopdate='): if stopDate: print 'ERROR: -stopdate specified more than once.' sys.exit() stopDate = argument[10:] if not stopDate: print 'ERROR: No stopping SMEI date specified for -stopdate.' sys.exit() if not re.search(SMEIDateRE, stopDate): print ('ERROR: Argument provided for -stopdate is not in ' + 'standard SMEI format.') sys.exit() # Did the user want to skip format errors? elif argument == '-skipbadformats': if skipBadFormats: print 'ERROR: -skipbadformats specified more than once.' sys.exit() skipBadFormats = True # The command line option was something other than the switched (optional) # options. Check to see if it is an illegal option or the star list. else: if sys.argv.index(argument) == len(sys.argv) - 1: # This is the last argument. Treat it as the star list. if not os.path.exists(argument): print 'ERROR: Star name list ' + argument + ' does not exist.' sys.exit() if not os.path.isfile(argument): print 'ERROR: Star name list ' + argument + ' is not a file.' sys.exit() starList = argument # The argument is an illegal one. else: print 'ERROR: Unrecognized option -- ' + argument sys.exit() # Check to see if all of the necessary arguments were specified by the user. if not baseDirectory: baseDirectory = os.path.expandvars('$SMEISKY0/equ') if not destDirectory: destDirectory = '.' if not starList: print 'ERROR: No star name list specified.' sys.exit() # Expand and modify path values. baseDirectory = os.path.normpath(os.path.expanduser(os.path.expandvars( baseDirectory))) destDirectory = os.path.normpath(os.path.expanduser(os.path.expandvars( destDirectory))) starList = os.path.normpath(os.path.expanduser(os.path.expandvars(starList))) # Output information to the user to inform him what the program will do. formatFileMessage = '' if formatFile: formatFileMessage = 'Formatting file: ' + formatFile else: formatFileMessage = 'No formatting will be performed.' print """ Conducting a removed star search with the following parameters: Base search directory: %s Destination directory: %s Star name list file: %s %s""" % (baseDirectory, destDirectory, starList, formatFileMessage) if startDate: print ' Start date: %s' % (startDate) if stopDate: print ' Stop date: %s' % (stopDate) if skipBadFormats: print ' Program set to skip text files that generate formatting errors.' # Open up the star name list and extract all star names. Ignore blank lines # and convert star names to lowercase. print '\nParsing star name list file...' starList = open(starList, 'r') if not starList: print 'ERROR: Could not open the star name list file for reading.' sys.exit() for starName in starList: starName = starName.strip().lower() if not starName or starName in starNames: continue starNames.append(starName) starList.close() if not starNames: print 'ERROR: No star names entered in the star name list file.' sys.exit() print ' Star names located in star name list:' sys.stdout.write(' ') print str(starNames) # Is there a formatting file? If so, read it in and create its commands. if formatFile: print 'Parsing formatting file...' formatFileHandle = open(formatFile, 'r') if not formatFileHandle: print 'ERROR: Could not open the formatting file for reading.' sys.exit() numberOfErrors = 0 for command in formatFileHandle: # Ignore comment lines! command = command.strip() if not command or command.startswith(';') or command.startswith(':'): continue # Extract the command and parse its arguments. commandList = command.split(',') if not commandList: continue for index in range(len(commandList)): commandList[index] = commandList[index].strip() if not commandList[0]: print ('ERROR: Command encountered without any command name ' + 'specified: ' + command) numberOfErrors += 1 continue # Create a new command class instance based on which command # was specified. # # NOTE TO FUTURE DEVELOPERS: If you create a new command class, # in addition to ensuring that the module containing the new command # class is included in this module, add an elif statement similar to # those below for your command to ensure that users can use your # command class. if commandList[0] == 'extract': formattingCommands.append(ExtractCommand(commandList[1:])) elif commandList[0] == 'smei2days': formattingCommands.append(Smei2daysCommand(commandList[1:])) elif commandList[0] == 'space': formattingCommands.append(SpaceCommand(commandList[1:])) elif commandList[0] == 'sumfloatextract': formattingCommands.append(SumFloatExtractCommand(commandList[1:])) elif commandList[0] == 'multiplyfloatextract': formattingCommands.append(MultiplyFloatExtractCommand( commandList[1:])) else: print ('ERROR: Command encountered with unknown command name: ' + command) numberOfErrors += 1 continue # Close the formatting file. If errors were encountered, terminate the # program. formatFileHandle.close() if numberOfErrors: print str(numberOfErrors) + ' error(s) encountered.' sys.exit() # Create the header, which will be the first line in every outputted # text file. This is essentially a line containing column captions. for command in formattingCommands: if isinstance(command, CaptionedFormattingCommand): formattingHeader += command.getFieldCaption() # Begin the search by opening up the necessary subdirectories and listing # their contents. print 'Beginning search and extraction...' for subdirectory in subdirectories: # Enter the current subdirectory. directory = os.path.expanduser(os.path.expandvars(os.path.normpath( os.path.join(baseDirectory, subdirectory)))) if not os.path.exists(directory): print 'WARNING: Directory ' + directory + ' does not exist. Skipping.' continue if not os.path.isdir(directory): print ('WARNING: Directory ' + directory + ' is not a directory. ' + 'Skipping.') continue # Find all of the files in the subdirectory and loop through them. fileList = os.listdir(directory) fileList.sort() print ('Searching subdirectory ' + subdirectory + ' (' + str(len(fileList)) + ' files total)') fileCounter = 0 starEntries = {} oldPercentageDone = 0 errorGeneratingFiles = 0 for file in fileList: # Display our progress through the directory as a percentage. fileIndex = fileList.index(file) percentageDone = int((fileIndex + 1.0) / len(fileList) * 100) if percentageDone % 5 == 0 and percentageDone != oldPercentageDone: oldPercentageDone = percentageDone sys.stdout.write(str(percentageDone) + '% ') sys.stdout.flush() # Filter the files for the PNT files we're looking for. In other # words, if the current file is not a PNT file, then skip it. if not (file.endswith('.txt') and len(file) > 6 and file[0:6] in fileNameHeaders): continue # If the current file does not fall within the date range provided # by the user (if the user provided such a range), then skip it. if startDate and file[6:21] < startDate: continue if stopDate and file[6:21] > stopDate: continue # Open the file pntFile = open(os.path.join(directory, file), 'r') if not pntFile: continue fileCounter = fileCounter + 1 # Cycle through the lines in the current PNT file. for lineEntry in pntFile: # Skip lines beginning with a semicolon or colon. lineEntry = lineEntry.strip() if lineEntry.startswith(';') or lineEntry.startswith(':'): continue # Add the entry to the list based on the star name (the first 11 # or so characters of the line, which can be modified by setting # starNameFieldSize). starName = lineEntry[0:starNameFieldSize].strip().lower() if starName in starNames: # Do we have formatting commands to process on the line entry? # If we do, run them on the line entry to modify it. lineEntry = lineEntry[starNameFieldSize:] if formattingCommands: # First parse the line entry after gutting out the star # name from the first starNameFieldSize characters. parsedLineEntry = (lineEntry.strip().split()) lineEntry = '' # Now, call the command class' execute methods with the # parsed line data. Store the results in a string, # which will be written to a .TXT file later. try: for command in formattingCommands: lineEntry += command.execute(parsedLineEntry) except FormatExecutionException, exception: if skipBadFormats: errorGeneratingFiles += 1 break else: raise exception # Add the line of data, formatted or not, to the list of # lines of data for the star associated with the data. addStarEntry(starName, lineEntry) # Close the file. pntFile.close() # Now that we're done examining all of the files in the subdirectory, # output the results to text files named after the stars. print '\n ' + str(fileCounter) + ' files examined.' if errorGeneratingFiles: print (' ' + str(errorGeneratingFiles) + ' files generated ' + 'formatting errors.') for starName in starEntries.keys(): # Open a file for the current star (if there are lines stored in its # associated list). if len(starEntries[starName]) == 0: continue outputFilePath = os.path.join(destDirectory, mutateStarName(starName) + subdirectory + '.txt') outputFile = open(outputFilePath, 'w') if not outputFile: print (' WARNING: Unable to open ' + outputFilePath + ' for writing.') continue # If there is a formatting header (i.e., a string containing column # captions), write it first. if formattingHeader: outputFile.write(formattingHeader + '\n') # Write the lines of data to the file, then close it. for entry in starEntries[starName]: outputFile.write(entry + '\n') outputFile.close() print (' Wrote star entries file ' + outputFilePath + ' for star ' + starName) # Done! print '\nDone\n'