;+ ; NAME: ; strposn ; PURPOSE: ; Locates a substring in an array of strings and returns ; string arrays for preceding and trailing parts. By default, the ; substring is removed, but can be retained using keywords ; TrailingFront or LeadingBack ; CATEGORY: ; string manipulation ; CALLING SEQUENCE: FUNCTION strposn, Strings, SubStr, Front, Back, $ Last = Last , $ frontdefault = FrontDefault , $ trailingfront = TrailingFront , $ leadingback = LeadingBack ; INPUTS: ; Strings string array or scalar (read-only) ; Substr string scalar or array (read-only) ; If not specified the scalar Substr=',' (comma) is used ; Substring(s) to be searched for. If more than one ; substring is specified to position of the string with ; the lowest (highers if /last is set) index is returned. ; OPTIONAL INPUT PARAMETERS: ; /last if set, search is for the last occurence of Substr ; (similar to rstrpos; default is to search for the ; first occurence, similar to strpos) ; /frontdefault if set, strings which do not contain Substr ; are returned in the Front array ; (by default these strings will be returned in Back) ; /trailingfront if set, and Substr is found it will be attached to ; the end of the Front array (i.e. SubStr is used as a ; terminator). ; /leadingback if set, and Substr is found it will be attached to the ; front of the Back array (i.e. Substr is used as a prefix) ; OUTPUTS: ; Index long integer array of same structure as Strings ; Contains the starting positions of SubStr (set to -1 ; for strings not containing SubStr) i.e. same as for ; strpos and rstrpos ; Front string variable of same structure as Strings. ; Contains the sections of the strings preceding SubStr ; - includes Substr if Trailingfront is set ; - includes strings not containing SubStr if ; Frontdefault is set ; Back string variable of same structure as Strings. ; Contains the sections of the strings following SubStr ; - includes Substr if Leadingback is set ; - includes strings not containing SubStr unless ; Frontdefault is set ; OPTIONAL OUTPUT PARAMETERS: ; (None) ; INCLUDE: @compile_opt.pro ; On error, return to caller ; CALLS: ; InitVar, SyncDims ; SIDE EFFECTS: ; TrailingFront and LeadingBack are mutually exclusive. If both are ; present TrailingFront takes precedence. ; RESTRICTIONS: ; None ; PROCEDURE: ; MODIFICATION HISTORY: ; DEC-1997, Paul Hick (UCSD/CASS) ; MAR-2003, Paul Hick (UCSD/CASS) ; Improved efficiency. Removed one of the two remaining explicit DO-loops, ; made the second more compact. ; DEC-2007, Paul Hick (UCSD/CASS; pphick@ucsd.edu) ; Argument Substr can now be an array (used to be scalar only) ;- InitVar, SubStr, ',' ; Set default SubStr InitVar, Last, /key ; First/last occurrence toggle IF n_elements(SubStr) GT 1 THEN BEGIN i_str = strposn(Strings, SubStr[0], Front, Back, $ Last = Last , $ frontdefault = FrontDefault , $ trailingfront = TrailingFront , $ leadingback = LeadingBack) FOR i=1,n_elements(SubStr)-1 DO BEGIN j_str = strposn(Strings, SubStr[i], FrontTmp, BackTmp, $ Last = Last , $ frontdefault = FrontDefault , $ trailingfront = TrailingFront , $ leadingback = LeadingBack) p = where(j_str NE -1 AND i_str EQ -1) q = where(j_str NE -1 AND i_str NE -1) IF p[0] NE -1 THEN BEGIN i_str[p] = j_str[p] Front[p] = FrontTmp[p] Back [p] = BackTmp [p] ENDIF IF q[0] NE -1 THEN BEGIN j1 = where(j_str[q] LT i_str[q],complement=j2) CASE Last OF 0: IF j1[0] NE -1 THEN p = q[j1] ELSE p = j1 1: IF j2[0] NE -1 THEN p = q[j2] ELSE p = j2 ENDCASE IF p[0] NE -1 THEN BEGIN i_str[p] = j_str [p] Front[p] = FrontTmp[p] Back [p] = BackTmp [p] ENDIF ENDIF ENDFOR RETURN, i_str ENDIF l_sub = strlen(SubStr) bSubStr = byte(SubStr) InitVar, FrontDefault, /key ; Front/back toggle for strings with no SubStr Separate = 0 ; Switch for output of Substr IF keyword_set(LeadingBack ) THEN Separate = 2 IF keyword_set(TrailingFront) THEN Separate = 1 s_str = size(Strings) ; Needed to restore output array structure IF s_str[0] NE 0 THEN Strings = reform(Strings, /overwrite) n_str = n_elements(Strings) ; # Strings i_str = replicate(-1,n_str) ; Initialize index array l_str = strlen(Strings) ; String lengths l_str_max = max(l_str) ; Maximum string length BStrings = byte(Strings) ; Convert to byte array e = where ( BStrings EQ bSubStr[0] ) IF l_sub GT 1 AND e[0] NE -1 THEN BEGIN ; SubStr has more than 1 char i = 0 ; The following is kinda kludgy, but it does the job REPEAT BEGIN i += 1 ; Next char in SubStr p = where ( e/l_str_max EQ (e+i)/l_str_max ) ; e and e+i must be in same string IF p[0] NE -1 THEN BEGIN e = e[p] ; Retain indices in BStrings p = where ( BStrings[e+i] EQ bSubStr[i] ) ENDIF IF p[0] EQ -1 THEN e = -1 ELSE e = e[p] ; Retain indices in BStrings ENDREP UNTIL i EQ l_sub-1 OR p[0] EQ -1 ENDIF CASE e[0] EQ -1 OF 0: BEGIN ; Substrings found b = e/l_str_max ; Index of strings with substrings ; Strings with multiple substrings are listed more than once CASE Last OF 0: p = n_elements(e)-1-reverse (uniq(reverse(b))); Search for first occurrence 1: p = Uniq(b) ; Search for last occurrence ENDCASE b = b[p] ; Index of strings with substring (no double entries) e = e[p] ; 1D location of substrings in BStrings with_sub = b i_str[b] = e mod l_str_max ; Position in string of substring ; Stays -1 for strings with no substring Mask = make_array( size=size(BStrings), value=FrontDefault ) IF FrontDefault THEN Mask[*,b] = 0B ; Strings containing substring are masked to 0 b *= l_str_max ; 1D location of start of string in BStrings p = where(b NE e) ; Non-zero-length leading parts BackTrouble = p[0] NE -1 IF BackTrouble THEN BEGIN b = b[p] ; 1D location of leading part e = e[p] ; 1D location of substring first_pos = e ; Needed to fix BackTrouble (see below) e -= b ; Length of string leading part ; Typically array 'e' will only contain a very limited number of different ; values. Loop over the unique values instead of over all 'e' values individually e_uniq = e[uniq(e,sort(e))] n_uniq = n_elements(e_uniq) FOR i=0,n_uniq-1 DO BEGIN e_i = where(e eq e_uniq[i],n_i) p_i = replicate(1,e_uniq[i])#b[e_i]+lindgen(e_uniq[i])#replicate(1,n_i) p_i = reform(p_i, e_uniq[i]*n_i, /overwrite) IF i EQ 0 THEN p = p_i else p = [p, p_i] ENDFOR IF n_uniq NE 0 THEN p = p[sort(p)] ; This is where most time is spent ;pp = b[0]+lindgen(e[0]) ;FOR i=1,n_elements(b)-1 DO pp = [pp,b[i]+lindgen(e[i])] ;if ( where(p ne pp) )[0] ne -1 then message, 'oops' Mask[p] = 1B ; Leading parts are masked to 1 ENDIF Front = string(BStrings*Mask) ;========= ; Can't do ; Back = string(BStrings*(1B-Mask)) ; for the trailing part because the null string is returned on each row ; starting with 0B. Hence the following: Back = BStrings*(1B-Mask) IF BackTrouble THEN BEGIN Back[p] = byte(' ') ; Overwrite leading parts with spaces first_char = Back[first_pos] Back[first_pos] = byte('a') ; Overwrite 1st char substring with non-white space ENDIF Back = string(Back) IF BackTrouble THEN BEGIN e = first_pos/l_str_max ; Strings with leading parts that are now white space Back[e] = strtrim(Back[e],1); Trim the leading white space Back[e] = string(transpose(first_char))+strmid(Back[e],1) ; Restore 1st char of substring ENDIF ;=========== ; At this point the substring is attached to the front of Back ; Separate = 0: remove substring ; Separate = 1: substring is moved to end of Front ; Separate = 2: substring stays at front of Back IF Separate NE 2 THEN BEGIN Back[with_sub] = strmid( Back[with_sub], l_sub ) IF Separate EQ 1 THEN Front[with_sub] += SubStr ENDIF END 1: BEGIN ; No substrings found CASE FrontDefault OF 0: BEGIN Front = strarr(n_str) Back = Strings END 1: BEGIN Front = Strings Back = strarr(n_str) END ENDCASE END ENDCASE SyncDims, Strings, size=s_str SyncDims, Front , size=s_str ; Restore input array structure. SyncDims, Back , size=s_str SyncDims, i_str , size=s_str RETURN, i_str & END