;+ ; NAME: ; FindAllFiles ; PURPOSE: ; Extended version of FindFile for locating file satisfying ; a list of file specs in a list of directories. ; FindAllFiles is intended to be robust; it is slow compared ; to a direct call to findfile. ; CATEGORY: ; Environment ; CALLING SEQUENCE: FUNCTION FindAllFiles, FilesIn , $ paths = PathsIn , $ count = count , $ separator = Separator , $ shortnames = ShortNames , $ nodirectories = NoDirectories , $ recursive = Recursive , $ symlink = symlink , $ excl_files = excl_files , $ excl_paths = excl_paths , $ excl_recursive = excl_recursive, $ forcecd = ForceCD , $ ls_style = ls_style ; INPUTS: ; Files list of file specifications, specified as ; a comma-separated list, or a string array ; (default: *.*, NOT *) ; OPTIONAL INPUT PARAMETERS: ; paths=Paths list of directories to be searched, specified ; as a separated list, or a string array ; (if omitted then the current directory is assumed) ; ; separator=Separator ; separator used between entries in Files and Paths ; /shortname if set then only file name, type (and version ; on vms) are returned (default: return fully ; qualified names) ; /recursive search recursively through all subdirectories ; /symlink (only on Unix/Linux systems when /recursive is set) ; by default, a recursive search does not descent into ; directories that are symbolic links. If /symlink is ; set then it will (see href="IsSymLink"). ; /nodirectories discards directories, i.e. only regular files are ; returned. ; /forcecd THIS HAS ONLY BEEN TESTED ON LINUX. ; by default the IDL findfile function is called with ; argument of type full_dir+file_name_wildcard. ; The array returned by findfile will then also contain ; fully-qualified filenames. In deeply nested directories ; with lots of matching files findfile may return nothing ; at all (probably because some memory buffer overflows). ; Setting /force_cd will first cd into 'full_dir', then ; do a findfile with only file_name_wildcard. Findfile will ; only return file names (without a directory). 'Full_dir' ; is then explicitly prefixed to the output of findfile. ; This may prevent the buffer overflow. ; /ls_style passed to href=findfile_fix=. If set then the content ; of subdirectories is included. ; ; Three additional keywords allow files and/or paths to be excluded: ; ; excl_files=excl_files ; list of file specifications similar to 'Files' ; excl_paths=excl_paths ; list of directories similar to 'Paths' ; /excl_recursive ; triggers a recursive search similar to '/recursive' ; ; These three keyword are used as input for a recursive call to ; FindAllFiles. The result is subtracted from the files matching ; found using 'Files' and 'Paths' ; ; OUTPUTS: ; Result string array with filenames; if no files are found then ; files is set to the null string (files='') ; OPTIONAL OUTPUT PARAMETERS: ; count=count number of files located ; INCLUDE: @compile_opt.pro ; On error return to caller ; CALLS: ; InitVar, IsType, os_separator, SetFileSpec, GetFileSpec, FindAllSubDirs ; CheckDir, where_common, findfile_fix, FindAllFiles ; SIDE EFFECTS: ; > windows: Directories . and .. are omitted ; Trailing delimiter for directories are stripped off ; > If both Files and Paths are specified as arrays then Separator is ; not used. ; ; The findfile function on Linux sometimes shows peculiar behavior ; (similar to the Unix ls command). Problems can arise when searching ; a directory with containing only one subdirectory; or when the ; directory contains a symbolic link. ; PROCEDURE: ; Separator is OS dependent: ',' on VMS and Win32, ' ' on all other ; Each entry in the Files list can contain a directory specification; ; in that case the Paths keyword is not used for that entry. ; MODIFICATION HISTORY: ; ???-????, Paul Hick (UCSD/CASS) ; AUG-2000, Paul Hick (UCSD/CASS) ; Added /recursive keyword ; JUN-2001, Paul Hick (UCSD/CASS) ; Added /symlink ; JAN-2002, Paul Hick (UCSD/CASS) ; Added exclusion keywords ; SEP-2002, Paul Hick (UCSD/CASS) ; On Linux output from the IDL findfile function is now filtered ; through findfile_fix to deal with directories. ; (this may actually be necessary for all Unix flavors) ; Added /forcecd and /ls_style keywords. Added check to exclude ; wildcards in the directory paths. ; MAR-2003, Paul Hick (UCSD/CASS) ; Improved processing of FileIn=''. This now should handle directories ; with lots of files better (at least on Linux) by avoiding the used ; of an explicit wildcard (*.*) in the call to findfile. ; FEB-2004, Paul Hick (UCSD/CASS) ; Changed check for !version.os to check for !version.os_family. ; Makes this hopefully it bit more generally useful. ; OCT-2004, Paul Hick (UCSD/CASS) ; Modified handling of a path specification without a filename. ; Windows didn't handle the Linux solution correctly. ; OCT-2006, Paul Hick (UCSD/CASS) ; Added forcecd keyword to FindAllSubDirs call ; JUL-2007, Paul Hick (UCSD/CASS; pphick@ucsd.edu) ; Replaced findfile by file_search ; Keyword /forcecd is now ignored. ;- ;IF n_elements(FilesIn) EQ 0 THEN Files = '*.*' ELSE Files = FilesIn IF n_elements(FilesIn) EQ 0 THEN Files = '' ELSE Files = FilesIn IF n_elements(PathsIn) EQ 0 THEN cd, current=Paths ELSE Paths = PathsIn InitVar, ShortNames , /key InitVar, NoDirectories, /key InitVar, Recursive , /key InitVar, ForceCD , /key InitVar, Separator , os_separator(/path) delimiter = os_separator(/dir) ; Convert single element list separated by Separator to an array. IF n_elements(Files) EQ 1 THEN Files = strtok(Files[0], Separator, /extract) IF n_elements(Paths) EQ 1 THEN Paths = strtok(Paths[0], Separator, /extract) FOR iSpec=0L,n_elements(Files)-1 DO BEGIN ; Loop over all file specifications ; Check whether file specification includes an explicit directory. SetFileSpec, Files[iSpec] Path = GetFileSpec(upto='Directory') ; Pick up directory File = GetFileSpec(from='FileName' ) ; Pick up file name (could include wildcard) NoPath = Path EQ '' ; No directory specified ;IF File EQ '' THEN File = '*.*' ; Default wilcard if no file name specified ; If a directory is specified search the specified directory only. ; If not search all directories in Paths. IF NoPath THEN PathsUsed = Paths ELSE PathsUsed = Path nPathsUsed = n_elements(PathsUsed) ; Loop over all directories in PathsUsed. We don't use a do loop because PathsUsed will get updated ; inside the loop when /recursive is set. iPath = -1 REPEAT BEGIN ; Loop over all directories iPath = iPath+1 Path = filepath(root=strtrim(PathsUsed[iPath],2),''); Add trailing delimiter ; If /recursive is set check Path for any directories. ; If found add them to the list of directories PathsUsed. IF Recursive THEN BEGIN Add = FindAllSubDirs(Path, count=iAdd, symlink=symlink, forcecd=ForceCD) IF iAdd NE 0 THEN BEGIN CASE iPath EQ nPathsUsed-1 OF 0: PathsUsed = [PathsUsed[0:iPath],Add,PathsUsed[iPath+1:nPathsUsed-1]] 1: PathsUsed = [PathsUsed[0:iPath],Add] ENDCASE nPathsUsed = nPathsUsed+iAdd ENDIF ENDIF IF strpos(Path,'*') NE -1 THEN BEGIN message, /info, 'no wildcards in path allowed: '+Path tmp = '' ENDIF ELSE BEGIN CASE File EQ '' OF 0: tmp = file_search( Path+File, /fully_qualify_path, /mark_directory ) 1: tmp = file_search( Path+'*' , /fully_qualify_path, /mark_directory ) ENDCASE ; If File does not contain a wildcard then Path+File could be a directory ; and findfile would return all files in that directory. To prevent this ; skip directories. ; The /forcecd keyword might prevent buffer overflows in the findfile ; call. Could be especially useful in recursive searches. ;SingleDir = File NE '' AND CheckDir( Path+File, full_dir, silent=1 ) ;CASE SingleDir OF ;0: BEGIN ; CASE ForceCD OF ; 0: BEGIN ; CASE !version.os_family OF ; 'Windows': BEGIN ; findfile will return nothing if File is blank (unless ; Path is blank too, but that doesn't happen here). ; We need a list of all files, so we use File='*' ; Unlike Linux, the Windows findfile always returns the full path. ; CASE File EQ '' OF ; 0: tmp = findfile( Path+File ) ; 1: tmp = findfile( Path+'*' ) ; ENDCASE ; END ; ELSE: BEGIN ; Mostly Linux ; findfile returns only file names (i.e. the directory ; is missing) if File is blank. Explicitly prepend ; full_dir if necessary. ; tmp = findfile( Path+File ) ; IF File EQ '' THEN IF tmp[0] NE '' THEN tmp = full_dir+tmp ; END ; ENDCASE ; END ; 1: BEGIN ; CASE CheckDir( Path, full_dir, silent=1 ) OF ; 0: tmp = '' ; 1: BEGIN ; cd, Path, current=current ; tmp = findfile( File ) ; cd, current ; IF tmp[0] NE '' THEN tmp = full_dir+tmp ; END ; ENDCASE ; END ; ENDCASE ;END ;1: tmp = full_dir ; Should have trailing (back-)slash ;ENDCASE ENDELSE IF tmp[0] NE '' THEN BEGIN CASE !version.os_family OF ; findfile_fix makes sure that directories have trailing slashes, and ; decides what to do with the contents of subdirectories. 'unix': ;tmp = findfile_fix(Path, File, tmp, ls_style=ls_style) 'Windows': BEGIN ; Drop .\ and ..\ IF strmid(tmp[0],strlen(tmp[0])-2,2) EQ '.' +delimiter THEN $ IF n_elements(tmp) EQ 1 then tmp = '' ELSE tmp = tmp[1:*] IF strmid(tmp[0],strlen(tmp[0])-3,3) EQ '..'+delimiter THEN $ IF n_elements(tmp) EQ 1 then tmp = '' ELSE tmp = tmp[1:*] ; If a file type is specified without a wildcard then findfile may ; give unexpected results: E.g. when searching for *.nic files then ; findfile also returns file names like *.nic_bad, i.e. with a file ; type starting with .nic but with some extra letters. These are filtered out. Ext = strlowcase(GetFileSpec(File, part='type')) IF Ext NE '' AND strpos(Ext,'*') EQ -1 THEN BEGIN itmp = where( strlowcase(GetFileSpec(tmp, part='type')) eq Ext ) IF itmp[0] EQ -1 THEN tmp = '' ELSE tmp = tmp[itmp] ENDIF IF NOT NoDirectories and tmp[0] NE '' THEN BEGIN FOR itmp=0,n_elements(tmp)-1 DO BEGIN ; Remove trailing \ ilen = strlen( tmp[itmp] ) IF strmid(tmp[itmp],ilen-1,1) EQ delimiter THEN $ tmp[itmp] = strmid(tmp[itmp],0,ilen-1) ENDFOR ENDIF END ENDCASE ; Drop directory if required IF ShortNames THEN tmp = GetFileSpec(tmp, from='FileName') IF NoDirectories THEN BEGIN tmpname = GetFileSpec(tmp, from='FileName') itmp = where(tmpname NE '.' AND tmpname NE '') CASE itmp[0] EQ -1 OF 0: tmp = tmp[itmp] 1: tmp = '' ENDCASE ENDIF IF tmp[0] ne '' THEN BEGIN CASE IsType(AllFiles, /defined) OF ; Collect result in AllFiles 0: AllFiles = tmp 1: AllFiles = [AllFiles, tmp] ENDCASE ENDIF ENDIF ENDREP UNTIL iPath EQ nPathsUsed-1 ENDFOR count = n_elements(AllFiles) ; # files found ; If no files were found set AllFiles to the null string. ; If files were found then remove duplicate entries. CASE count NE 0 OF 0: AllFiles = '' 1: BEGIN ;AllFiles = AllFiles[uniq(AllFiles, sort(AllFiles))] AllFiles = AllFiles[unique_only(AllFiles)] count = n_elements(AllFiles) END ENDCASE IF count GT 0 THEN BEGIN ; If one of the exclude keywords is set use them for a recursive call ; to this function. Subtract the result from the AllFiles array IF n_elements(excl_files) NE 0 OR n_elements(excl_paths) NE 0 THEN BEGIN tmp = FindAllFiles(excl_files, paths=excl_paths , $ recursive=excl_recursive, symlink=symlink , $ count=itmp, forcecd=ForceCD) IF itmp GT 0 THEN BEGIN ; Found files to be excluded tmp = where_common(AllFiles, tmp, absent=itmp) CASE itmp[0] EQ -1 OF ; No more files left after exclusion 0: BEGIN ; Pick up remaining files AllFiles = AllFiles[itmp] count = n_elements(AllFiles) END 1: BEGIN AllFiles = '' count = 0 END ENDCASE ENDIF ENDIF ENDIF RETURN, AllFiles & END