;+
; NAME:
;	flt_string
; PURPOSE:
;	Extract numbers from string and put them in floating point array
; CATEGORY:
;	String processing
; CALLING SEQUENCE:
	FUNCTION flt_string, INPUT				, $ 
		exponent		= exponent			, $ 
		xfora			= xfora 			, $ 
		double_precision= double_precision	, $
		integer			= integer			, $
		strvec			= StrVEC			, $ 
		fmt 			= fmt				, $ 
		numfmt			= numfmt			, $ 
		lenfmt			= lenfmt			, $
		strcrumbs		= strcrumbs 		, $ 
		numcrumbs		= numcrumbs 		, $ 
		lencrumbs		= lencrumbs 		, $
		modify_format	= modify_format 	, $
		mask_string 	= mask_string		, $
		is_number		= is_number			, $
		is_integer		= is_integer		, $
		positive_only	= positive_only
; INPUTS:
;	INPUT			string (e.g. 89/1/12)
; OPTIONAL INPUTS:
;	/exponent		if set and nonzero, exponentials of type 'D' and 'E' are
;					also interpreted
;	/double_precision
;					if set then a double precision array is returned
;					(default: single precision)
;	/xfora
;	/modify_format	if set, the format returned in 'fmt' is run
;					through the function href=flt_format= to
;					shorten the format string.
;	mask_string=mask_string
;					scalar; type: string
;						string of '0' and '1' with '1' in chars that
;						are explicitly NOT part of a number.
;						(the char will be part of an A or X format in the
;						resulting format specifier returned in 'fmt')
;	/is_number		test string; returns non-zero integer for strings that do not have
;						any chars that are not part of a number
;						(i.e. for which numcrumbs=0). The non-zero value
;						counts the numbers found in the string.
;	/is_integer		test string; as /is_number, but tests for integers
;	/positive_only	assumes that all numbers are positive
;						(effectively this ignores the minus character
;						"-" as not a valid part of the mantissa;
;						it can still occur in an exponent).
; OUTPUTS:
;	RtnVal			floating array (e.g. [89,1,12])
;	StrVEC			string array (array FltVEC before conversion to float)
;	fmt				format string matching the input string
;	numfmt			# valid numbers located in the input string
;	lenfmt			# chars taken up by the numfmt numbers in the input string
;	strcrumbs		string array with part of input string not translated
;					into numbers
;	numcrumbs		# elements in strcrumbs
;	lencrumbs		# total length of all elements in strcrumbs summed
; INCLUDE:
	@compile_opt.pro		; On error, return to caller
; CALLS:
;	boost, BadValue, flt_string, flt_format, InitVar
; PROCEDURE:
;	Checks each character against a list of valid characters.
;	Processing characters sequentially going from first to last,
;	the largest possible substrings representing valid numbers
;	are extracted. Only integer exponents are accepted
;	(if the keyword /exponent is set).
; MODIFICATION HISTORY:
;	Written Jan 1989 by DMZ (ARC)
;	Converted to V2 Dec 1990 by DMZ (ARC)
;	OCT-1990, Paul Hick (ARC)
;		Expanded to accept exponentials,
;	?-1993, Paul Hick (UCSD)
;		Added construction of format specifiers
;	FEB-1995, Paul Hick (UCSD/CASS)
;		Added option to pull out the substrings not translated
;		into numbers
;	MAR-2000, Paul Hick (UCSD/CASS)
;		Added /double_precision keyword
;	JUL-2002, Paul Hick (UCSD/CASS)
;		Fixed problem dealing with formatting of peculiar exponents.
;		E.g. '(1.dat)' would return format '(D2.-4,A3)'.
;		Now it returns '(D3.0,A2)'
;	MAY-2004, Paul Hick (UCSD/CASS)
;		Changed strmid(var,2,999) to strmid(var,2)
;		(extracts to end of string 'var')
;	SEP-2004, Paul Hick (UCSD/CASS)
;		Added call to flt_format to shorten the format specifier
;		returned in fmt
;	DEC-2008, Paul Hick (UCSD/CASS; pphick@ucsd.edu)
;		Added /positive_only keyword
;-

n = n_elements(INPUT)

IF n GT 1 THEN BEGIN

	destroyvar, FltVEC, StrVEC, fmt, numfmt
	destroyvar, lenfmt, strcrumbs, numcrumbs, lencrumbs

	FOR i=0L,n-1 DO BEGIN

		FltVECi = flt_string(INPUT[i], $
			exponent		= exponent		, $
			xfora			= xfora 		, $
			double_precision= double_precision, $
			integer			= integer		, $
			strvec			= StrVECi		, $
			fmt 			= fmti			, $
			numfmt			= numfmti		, $
			lenfmt			= lenfmti		, $
			strcrumbs		= strcrumbsi	, $
			numcrumbs		= numcrumbsi	, $
			lencrumbs		= lencrumbsi	, $
			modify_format	= modify_format	, $
			is_number		= is_number 	, $
			is_integer		= is_integer	, $
			positive_only	= positive_only)

		boost, /push, FltVEC, FltVECi
		boost, /push, StrVEC, StrVECi
		boost, /push, fmt   , fmti
		boost, /push, numfmt, numfmti
		boost, /push, lenfmt, lenfmti
		boost, /push, strcrumbs, strcrumbsi
		boost, /push, numcrumbs, numcrumbsi
		boost, /push, lencrumbs, lencrumbsi

	ENDFOR

	RETURN, FltVEC

ENDIF

InitVar, positive_only		, /key
InitVar, is_number			, /key
InitVar, is_integer			, /key
InitVar, double_precision	, /key
InitVar, integer			, /key
InitVar, exponent			, /key
InitVar, xfora	 			, /key
InitVar, modify_format		, /key

VALID_NR    = ['1','2','3','4','5','6','7','8','9','0']
VALID_NRDOT = [VALID_NR,'.']
VALID_EXP   = [VALID_NR,'+','-']
CASE positive_only OF
0: VALID_FULL  = [VALID_NR,'.','+','-']
1: VALID_FULL  = [VALID_NR,'.','+']
ENDCASE

cStr = strtrim(INPUT[0])
Len  = strlen(cStr)
I = -1

InitVar, mask_string, ''
nmask = strlen(mask_string)

FltVec = BadValue( integer ? 0LL : double_precision ? 0.0d0 : 0.0 )
StrVEC = ''
fmt    = '*'
numfmt = 0
fmtcnt = 0
lenfmt = 0

numcrumbs = 0
lencrumbs = 0

LAST = I								; End of previous number
BLANKS = 0

dot_found = 0

FIRST_CHAR:

I += 1

IF I GE Len THEN goto, DONE

IF I LT nmask THEN BEGIN
	IF strmid(mask_string,I,1) EQ '1' THEN BEGIN
		BLANKS = 0
		goto, FIRST_CHAR
	ENDIF
ENDIF

CH = strmid(cStr,I,1)
IF CH EQ ' ' THEN BEGIN
	BLANKS += 1
	goto, FIRST_CHAR
ENDIF

A = where(CH EQ VALID_FULL,cNum)		; First char must be +,-,. or digit
IF cNum EQ 0 THEN BEGIN
	BLANKS = 0
	goto, FIRST_CHAR
ENDIF

FIRST = -1
IF CH EQ '+' OR CH EQ '-' THEN BEGIN

	J = I+1								; Position following +,-
	IF J GE Len THEN BEGIN
		I = J
		goto, DONE
	ENDIF

	IF J LT nmask THEN BEGIN
		IF strmid(mask_string,J,1) EQ '1' THEN BEGIN
			BLANKS = 0
			goto, FIRST_CHAR
		ENDIF
	ENDIF

	CH = strmid(cStr,J,1)
	A = where(CH EQ VALID_NRDOT,cNum)	; +,- must be followed by . or digit
	IF cNum EQ 0 THEN BEGIN
		BLANKS = 0
		goto, FIRST_CHAR
	ENDIF

	FIRST = I							; Position of +,- (start of number)
	I = J								; Position of . or digit

ENDIF

IF CH EQ '.' THEN BEGIN

	J = I+1								; Position following .
	IF J GE Len THEN BEGIN
		I = J
		goto, DONE
	ENDIF

	IF J LT nmask THEN BEGIN
		IF strmid(mask_string,J,1) EQ '1' THEN BEGIN
			BLANKS = 0
			goto, FIRST_CHAR
		ENDIF
	ENDIF

	CH = strmid(cStr,J,1)
	A = where(CH EQ VALID_NR,cNum)		; . must be followed by number
	IF cNum EQ 0 THEN BEGIN
		BLANKS = 0
		goto, FIRST_CHAR
	ENDIF

	dot_found = 1

	DOT = I								; Position of .
	fmtchar = 'F'
	IF FIRST EQ -1 THEN FIRST = I		; Start of number
	I = J								; Position of digit

ENDIF ELSE	$
	DOT = -1

IF FIRST EQ -1 THEN FIRST = I			; Start of number
FIRST -= BLANKS							; Include blanks preceding number in format
BLANKS = 0								; Clear blanks counter

IF FIRST GT LAST+1 THEN BEGIN
	CASE fmtcnt OF
	0:    fmtl = 0
	1:    fmt += ','+fmtsub
	ELSE: fmt += ','+strcompress(fmtcnt,/remove_all)+fmtsub
	ENDCASE

	lenfmt += fmtcnt*fmtl

	A = FIRST-LAST-1

	fmtcnt = 0
	CASE xfora OF
	0: fmt += ',A'+strcompress(A,/remove_all)
	1: fmt += ','+strcompress(A,/remove_all)+'X'
	ENDCASE

	numcrumbs += 1
	lencrumbs += A
	A = strmid(cStr,LAST+1,A)
	CASE numcrumbs OF
	1   : strcrumbs = A
	ELSE: strcrumbs = [strcrumbs,A]
	ENDCASE

ENDIF

; At this point the first digit is located (possibly preceded by +,+.,- or -.)
; i.e. a valid number has been located (as yet incomplete)
; FIRST = position where number starts
; DOT	= position of . (-1 if no . was found)
; I	= position of first digit

; Now start looking for the end of the number

LAST_CHAR:

I += 1
IF I GE Len   THEN goto, NUMBER
IF I LT nmask THEN IF strmid(mask_string,I,1) EQ '1' THEN goto, NUMBER

CH = strmid(cStr,I,1)
IF DOT EQ -1 AND CH EQ '.' THEN BEGIN	; . is acceptable if it's the first one
	dot_found = 1
	DOT = I
	fmtchar = 'F'
	goto, LAST_CHAR
ENDIF

A = where(CH EQ VALID_NR,cNum)			; only digits are acceptable
IF cNum GT 0 THEN goto, LAST_CHAR

; if the keyword 'exponent' is not set then the number is considered completed.
; if the keyword 'exponent' is set then continue only if the letter D or E is found
; If the number is complete then I is the position following the number

IF NOT exponent OR (CH NE 'D' AND CH NE 'E' AND CH NE 'd' AND CH Ne 'e') THEN goto, NUMBER

fmtchar = strupcase(CH)

; Analyze exponent

IES = I									; Position of D,E

I = IES+1
IF I GE Len   THEN goto, NUMBER
IF I LT nmask THEN IF strmid(mask_string,I,1) EQ '1' THEN goto, NUMBER

CH = strmid(cStr,I,1)
A = where(CH EQ VALID_EXP,cNum)			; D,E must be followed by +,- or number
IF cNum EQ 0 THEN goto, NUMBER

IE = I

IF CH EQ '+' OR CH EQ '-' THEN BEGIN	; If IE is position of +,-
	J = IE+1							; Position following +,-
	IF J GE Len   THEN goto, NUMBER
	IF J LT nmask THEN IF strmid(mask_string,J,1) EQ '1' THEN goto, NUMBER

	CH = strmid(cStr,J,1)
	A = where(CH EQ VALID_NR,cNum)		; +,- must be followed by number
	IF cNum EQ 0 THEN goto, NUMBER

	IE = J								; Position of digit
ENDIF

I = IE									; first digit in exponent located
										; (possibly preceded by + or -)
LAST_CHAR_EXP:

I += 1
IF I GE Len THEN goto, NUMBER
IF I LT nmask THEN IF strmid(mask_string,I,1) EQ '1' THEN goto, NUMBER

CH = strmid(cStr,I,1)
A = where(CH EQ VALID_NR,cNum)		; only numbers are acceptable
IF cNum GT 0 THEN goto, LAST_CHAR_EXP

NUMBER:

StrVAL = strmid(cStr,FIRST,I-FIRST)

CASE 1 OF
integer 		: FltVal = long64(StrVAL)
double_precision: FltVAL = double(StrVAL)
ELSE    		: FltVAL = float (StrVAL)
ENDCASE

IF StrVEC[0] EQ '' THEN BEGIN
	StrVEC = StrVAL
	FltVEC = FltVAL
ENDIF ELSE BEGIN
	StrVEC = [StrVEC,StrVAL]
	FltVEC = [FltVEC,FltVAL]
ENDELSE

numfmt += 1

IF DOT EQ I-1 THEN BEGIN	; . is last char in number
	I = DOT 				; . will be processed again
	DOT = -1				; Cancel dot
ENDIF						; (this replaces Fn.0 by In)

IF DOT EQ -1 THEN BEGIN 	; No dot in number
	fmtchar = 'I'			; Switch to I format
	DOT = I-1
ENDIF

fmtnew = fmtchar + strcompress(I-FIRST,/remove_all)
CASE fmtchar OF
'F':  fmtnew += '.' + strcompress(I-  DOT-1 > 0,/remove_all)
'D':  fmtnew += '.' + strcompress(IES-DOT-1 > 0,/remove_all)
'E':  fmtnew += '.' + strcompress(IES-DOT-1 > 0,/remove_all)
ELSE:;fmtnew  = fmtnew
ENDCASE

IF fmtcnt EQ 0 THEN BEGIN
	fmtsub = fmtnew
	fmtcnt = 1
	fmtl = I-FIRST

ENDIF ELSE IF fmtnew EQ fmtsub THEN	$
	fmtcnt += 1			$

ELSE BEGIN
	CASE fmtcnt OF
	1:	  fmt += ','+fmtsub
	ELSE: fmt += ','+strcompress(fmtcnt,/remove_all)+fmtsub
	ENDCASE

	lenfmt += fmtcnt*fmtl

	fmtsub = fmtnew
	fmtl = I-FIRST
	fmtcnt = 1

ENDELSE

I -= 1
LAST = I			; Pos of last character processed
goto, FIRST_CHAR

DONE:

CASE fmtcnt OF
0:    fmtl = 0
1:    fmt += ','+fmtsub
ELSE: fmt += ','+strcompress(fmtcnt,/remove_all)+fmtsub
ENDCASE

lenfmt += fmtcnt*fmtl

IF I GT LAST+1 THEN BEGIN

	A = I-LAST-1

	CASE xfora OF
	0: fmt += ',A'+strcompress(A,/remove_all)
	1: fmt += ','+strcompress(A,/remove_all)+'X'
	ENDCASE

	numcrumbs += 1
	lencrumbs += A
	A = strmid(cStr,LAST+1,A)
	CASE numcrumbs OF
	1   : strcrumbs = A
	ELSE: strcrumbs = [strcrumbs,A]
	ENDCASE

ENDIF

fmt = strmid(fmt,2)

IF modify_format THEN fmt = flt_format(fmt)

fmt = '('+fmt+')'

CASE 1 OF
is_number : RETURN,               (numcrumbs EQ 0)*finite(FltVec[0])*n_elements(FltVEC)
is_integer: RETURN, (1-dot_found)*(numcrumbs EQ 0)*finite(FltVec[0])*n_elements(FltVEC)
ELSE	  : RETURN, FltVEC
ENDCASE

END