;+ ; NAME: ; flt_string ; PURPOSE: ; Extract numbers from string and put them in floating point array ; CATEGORY: ; String processing ; CALLING SEQUENCE: FUNCTION flt_string, INPUT , $ exponent = exponent , $ xfora = xfora , $ double_precision= double_precision , $ integer = integer , $ strvec = StrVEC , $ fmt = fmt , $ numfmt = numfmt , $ lenfmt = lenfmt , $ strcrumbs = strcrumbs , $ numcrumbs = numcrumbs , $ lencrumbs = lencrumbs , $ modify_format = modify_format , $ mask_string = mask_string , $ is_number = is_number , $ is_integer = is_integer , $ positive_only = positive_only ; INPUTS: ; INPUT string (e.g. 89/1/12) ; OPTIONAL INPUTS: ; /exponent if set and nonzero, exponentials of type 'D' and 'E' are ; also interpreted ; /double_precision ; if set then a double precision array is returned ; (default: single precision) ; /xfora ; /modify_format if set, the format returned in 'fmt' is run ; through the function href=flt_format= to ; shorten the format string. ; mask_string=mask_string ; scalar; type: string ; string of '0' and '1' with '1' in chars that ; are explicitly NOT part of a number. ; (the char will be part of an A or X format in the ; resulting format specifier returned in 'fmt') ; /is_number test string; returns non-zero integer for strings that do not have ; any chars that are not part of a number ; (i.e. for which numcrumbs=0). The non-zero value ; counts the numbers found in the string. ; /is_integer test string; as /is_number, but tests for integers ; /positive_only assumes that all numbers are positive ; (effectively this ignores the minus character ; "-" as not a valid part of the mantissa; ; it can still occur in an exponent). ; OUTPUTS: ; RtnVal floating array (e.g. [89,1,12]) ; StrVEC string array (array FltVEC before conversion to float) ; fmt format string matching the input string ; numfmt # valid numbers located in the input string ; lenfmt # chars taken up by the numfmt numbers in the input string ; strcrumbs string array with part of input string not translated ; into numbers ; numcrumbs # elements in strcrumbs ; lencrumbs # total length of all elements in strcrumbs summed ; INCLUDE: @compile_opt.pro ; On error, return to caller ; CALLS: ; boost, BadValue, flt_string, flt_format, InitVar ; PROCEDURE: ; Checks each character against a list of valid characters. ; Processing characters sequentially going from first to last, ; the largest possible substrings representing valid numbers ; are extracted. Only integer exponents are accepted ; (if the keyword /exponent is set). ; MODIFICATION HISTORY: ; Written Jan 1989 by DMZ (ARC) ; Converted to V2 Dec 1990 by DMZ (ARC) ; OCT-1990, Paul Hick (ARC) ; Expanded to accept exponentials, ; ?-1993, Paul Hick (UCSD) ; Added construction of format specifiers ; FEB-1995, Paul Hick (UCSD/CASS) ; Added option to pull out the substrings not translated ; into numbers ; MAR-2000, Paul Hick (UCSD/CASS) ; Added /double_precision keyword ; JUL-2002, Paul Hick (UCSD/CASS) ; Fixed problem dealing with formatting of peculiar exponents. ; E.g. '(1.dat)' would return format '(D2.-4,A3)'. ; Now it returns '(D3.0,A2)' ; MAY-2004, Paul Hick (UCSD/CASS) ; Changed strmid(var,2,999) to strmid(var,2) ; (extracts to end of string 'var') ; SEP-2004, Paul Hick (UCSD/CASS) ; Added call to flt_format to shorten the format specifier ; returned in fmt ; DEC-2008, Paul Hick (UCSD/CASS; pphick@ucsd.edu) ; Added /positive_only keyword ;- n = n_elements(INPUT) IF n GT 1 THEN BEGIN destroyvar, FltVEC, StrVEC, fmt, numfmt destroyvar, lenfmt, strcrumbs, numcrumbs, lencrumbs FOR i=0L,n-1 DO BEGIN FltVECi = flt_string(INPUT[i], $ exponent = exponent , $ xfora = xfora , $ double_precision= double_precision, $ integer = integer , $ strvec = StrVECi , $ fmt = fmti , $ numfmt = numfmti , $ lenfmt = lenfmti , $ strcrumbs = strcrumbsi , $ numcrumbs = numcrumbsi , $ lencrumbs = lencrumbsi , $ modify_format = modify_format , $ is_number = is_number , $ is_integer = is_integer , $ positive_only = positive_only) boost, /push, FltVEC, FltVECi boost, /push, StrVEC, StrVECi boost, /push, fmt , fmti boost, /push, numfmt, numfmti boost, /push, lenfmt, lenfmti boost, /push, strcrumbs, strcrumbsi boost, /push, numcrumbs, numcrumbsi boost, /push, lencrumbs, lencrumbsi ENDFOR RETURN, FltVEC ENDIF InitVar, positive_only , /key InitVar, is_number , /key InitVar, is_integer , /key InitVar, double_precision , /key InitVar, integer , /key InitVar, exponent , /key InitVar, xfora , /key InitVar, modify_format , /key VALID_NR = ['1','2','3','4','5','6','7','8','9','0'] VALID_NRDOT = [VALID_NR,'.'] VALID_EXP = [VALID_NR,'+','-'] CASE positive_only OF 0: VALID_FULL = [VALID_NR,'.','+','-'] 1: VALID_FULL = [VALID_NR,'.','+'] ENDCASE cStr = strtrim(INPUT[0]) Len = strlen(cStr) I = -1 InitVar, mask_string, '' nmask = strlen(mask_string) FltVec = BadValue( integer ? 0LL : double_precision ? 0.0d0 : 0.0 ) StrVEC = '' fmt = '*' numfmt = 0 fmtcnt = 0 lenfmt = 0 numcrumbs = 0 lencrumbs = 0 LAST = I ; End of previous number BLANKS = 0 dot_found = 0 FIRST_CHAR: I += 1 IF I GE Len THEN goto, DONE IF I LT nmask THEN BEGIN IF strmid(mask_string,I,1) EQ '1' THEN BEGIN BLANKS = 0 goto, FIRST_CHAR ENDIF ENDIF CH = strmid(cStr,I,1) IF CH EQ ' ' THEN BEGIN BLANKS += 1 goto, FIRST_CHAR ENDIF A = where(CH EQ VALID_FULL,cNum) ; First char must be +,-,. or digit IF cNum EQ 0 THEN BEGIN BLANKS = 0 goto, FIRST_CHAR ENDIF FIRST = -1 IF CH EQ '+' OR CH EQ '-' THEN BEGIN J = I+1 ; Position following +,- IF J GE Len THEN BEGIN I = J goto, DONE ENDIF IF J LT nmask THEN BEGIN IF strmid(mask_string,J,1) EQ '1' THEN BEGIN BLANKS = 0 goto, FIRST_CHAR ENDIF ENDIF CH = strmid(cStr,J,1) A = where(CH EQ VALID_NRDOT,cNum) ; +,- must be followed by . or digit IF cNum EQ 0 THEN BEGIN BLANKS = 0 goto, FIRST_CHAR ENDIF FIRST = I ; Position of +,- (start of number) I = J ; Position of . or digit ENDIF IF CH EQ '.' THEN BEGIN J = I+1 ; Position following . IF J GE Len THEN BEGIN I = J goto, DONE ENDIF IF J LT nmask THEN BEGIN IF strmid(mask_string,J,1) EQ '1' THEN BEGIN BLANKS = 0 goto, FIRST_CHAR ENDIF ENDIF CH = strmid(cStr,J,1) A = where(CH EQ VALID_NR,cNum) ; . must be followed by number IF cNum EQ 0 THEN BEGIN BLANKS = 0 goto, FIRST_CHAR ENDIF dot_found = 1 DOT = I ; Position of . fmtchar = 'F' IF FIRST EQ -1 THEN FIRST = I ; Start of number I = J ; Position of digit ENDIF ELSE $ DOT = -1 IF FIRST EQ -1 THEN FIRST = I ; Start of number FIRST -= BLANKS ; Include blanks preceding number in format BLANKS = 0 ; Clear blanks counter IF FIRST GT LAST+1 THEN BEGIN CASE fmtcnt OF 0: fmtl = 0 1: fmt += ','+fmtsub ELSE: fmt += ','+strcompress(fmtcnt,/remove_all)+fmtsub ENDCASE lenfmt += fmtcnt*fmtl A = FIRST-LAST-1 fmtcnt = 0 CASE xfora OF 0: fmt += ',A'+strcompress(A,/remove_all) 1: fmt += ','+strcompress(A,/remove_all)+'X' ENDCASE numcrumbs += 1 lencrumbs += A A = strmid(cStr,LAST+1,A) CASE numcrumbs OF 1 : strcrumbs = A ELSE: strcrumbs = [strcrumbs,A] ENDCASE ENDIF ; At this point the first digit is located (possibly preceded by +,+.,- or -.) ; i.e. a valid number has been located (as yet incomplete) ; FIRST = position where number starts ; DOT = position of . (-1 if no . was found) ; I = position of first digit ; Now start looking for the end of the number LAST_CHAR: I += 1 IF I GE Len THEN goto, NUMBER IF I LT nmask THEN IF strmid(mask_string,I,1) EQ '1' THEN goto, NUMBER CH = strmid(cStr,I,1) IF DOT EQ -1 AND CH EQ '.' THEN BEGIN ; . is acceptable if it's the first one dot_found = 1 DOT = I fmtchar = 'F' goto, LAST_CHAR ENDIF A = where(CH EQ VALID_NR,cNum) ; only digits are acceptable IF cNum GT 0 THEN goto, LAST_CHAR ; if the keyword 'exponent' is not set then the number is considered completed. ; if the keyword 'exponent' is set then continue only if the letter D or E is found ; If the number is complete then I is the position following the number IF NOT exponent OR (CH NE 'D' AND CH NE 'E' AND CH NE 'd' AND CH Ne 'e') THEN goto, NUMBER fmtchar = strupcase(CH) ; Analyze exponent IES = I ; Position of D,E I = IES+1 IF I GE Len THEN goto, NUMBER IF I LT nmask THEN IF strmid(mask_string,I,1) EQ '1' THEN goto, NUMBER CH = strmid(cStr,I,1) A = where(CH EQ VALID_EXP,cNum) ; D,E must be followed by +,- or number IF cNum EQ 0 THEN goto, NUMBER IE = I IF CH EQ '+' OR CH EQ '-' THEN BEGIN ; If IE is position of +,- J = IE+1 ; Position following +,- IF J GE Len THEN goto, NUMBER IF J LT nmask THEN IF strmid(mask_string,J,1) EQ '1' THEN goto, NUMBER CH = strmid(cStr,J,1) A = where(CH EQ VALID_NR,cNum) ; +,- must be followed by number IF cNum EQ 0 THEN goto, NUMBER IE = J ; Position of digit ENDIF I = IE ; first digit in exponent located ; (possibly preceded by + or -) LAST_CHAR_EXP: I += 1 IF I GE Len THEN goto, NUMBER IF I LT nmask THEN IF strmid(mask_string,I,1) EQ '1' THEN goto, NUMBER CH = strmid(cStr,I,1) A = where(CH EQ VALID_NR,cNum) ; only numbers are acceptable IF cNum GT 0 THEN goto, LAST_CHAR_EXP NUMBER: StrVAL = strmid(cStr,FIRST,I-FIRST) CASE 1 OF integer : FltVal = long64(StrVAL) double_precision: FltVAL = double(StrVAL) ELSE : FltVAL = float (StrVAL) ENDCASE IF StrVEC[0] EQ '' THEN BEGIN StrVEC = StrVAL FltVEC = FltVAL ENDIF ELSE BEGIN StrVEC = [StrVEC,StrVAL] FltVEC = [FltVEC,FltVAL] ENDELSE numfmt += 1 IF DOT EQ I-1 THEN BEGIN ; . is last char in number I = DOT ; . will be processed again DOT = -1 ; Cancel dot ENDIF ; (this replaces Fn.0 by In) IF DOT EQ -1 THEN BEGIN ; No dot in number fmtchar = 'I' ; Switch to I format DOT = I-1 ENDIF fmtnew = fmtchar + strcompress(I-FIRST,/remove_all) CASE fmtchar OF 'F': fmtnew += '.' + strcompress(I- DOT-1 > 0,/remove_all) 'D': fmtnew += '.' + strcompress(IES-DOT-1 > 0,/remove_all) 'E': fmtnew += '.' + strcompress(IES-DOT-1 > 0,/remove_all) ELSE:;fmtnew = fmtnew ENDCASE IF fmtcnt EQ 0 THEN BEGIN fmtsub = fmtnew fmtcnt = 1 fmtl = I-FIRST ENDIF ELSE IF fmtnew EQ fmtsub THEN $ fmtcnt += 1 $ ELSE BEGIN CASE fmtcnt OF 1: fmt += ','+fmtsub ELSE: fmt += ','+strcompress(fmtcnt,/remove_all)+fmtsub ENDCASE lenfmt += fmtcnt*fmtl fmtsub = fmtnew fmtl = I-FIRST fmtcnt = 1 ENDELSE I -= 1 LAST = I ; Pos of last character processed goto, FIRST_CHAR DONE: CASE fmtcnt OF 0: fmtl = 0 1: fmt += ','+fmtsub ELSE: fmt += ','+strcompress(fmtcnt,/remove_all)+fmtsub ENDCASE lenfmt += fmtcnt*fmtl IF I GT LAST+1 THEN BEGIN A = I-LAST-1 CASE xfora OF 0: fmt += ',A'+strcompress(A,/remove_all) 1: fmt += ','+strcompress(A,/remove_all)+'X' ENDCASE numcrumbs += 1 lencrumbs += A A = strmid(cStr,LAST+1,A) CASE numcrumbs OF 1 : strcrumbs = A ELSE: strcrumbs = [strcrumbs,A] ENDCASE ENDIF fmt = strmid(fmt,2) IF modify_format THEN fmt = flt_format(fmt) fmt = '('+fmt+')' CASE 1 OF is_number : RETURN, (numcrumbs EQ 0)*finite(FltVec[0])*n_elements(FltVEC) is_integer: RETURN, (1-dot_found)*(numcrumbs EQ 0)*finite(FltVec[0])*n_elements(FltVEC) ELSE : RETURN, FltVEC ENDCASE END