;+ ; NAME: ; smei_buf_prep ; PURPOSE: ; Decide whether a frame extracted from an L1A file needs to be ; written to disk. ; CATEGORY: ; camera/idl/buf ; CALLING SEQUENCE: FUNCTION smei_buf_prep, hdri, destination, $ nic = nic , $ fits = fits , $ last_dir = last_dir , $ filelist = filelist , $ split_dir = split_dir , $ overwrite = overwrite , $ count = count , $ last_time = last_time , $ noclones = noclones , $ comment = comment , $ gzip = gzip , $ need_gzip = need_gzip , $ status = status ; INPUTS: ; hdri array[1]; type: frame header structure ; frame header ; destination scalar; type: string ; name of existing directory where to write the file ; ; If split_dir is NOT set then frames will be written ; directly to 'destination' ; If split_dir is SET then files are written into subdirectories ; destination/year_doy/c1, destination/year_doy/c2 or ; destination/year_doy/c3 ; count array[4]; type: integer ; Counters to be updated. ; OPTIONAL INPUT PARAMETERS: ; /nic write nic file ; /fits write fits file ; ; /split_dir if writing out lots and lots of frames it is probably better ; to distribute the frames over multiple directories. ; If /split_dir is set then each day of data is split in ; 4-hour intervals over 6 directories with names ; destination/YYYY_DDD/HH with HH=00,04,08,12,16,20 ; /overwrite by default frames are not written to drive if a file for ; the frame already exists, UNLESS the new frame was from ; the 1st telemetry dump. Set this keyword to ; unconditionally overwrite existing files. ; (setting -overwrite speeds up processing by a factor 2). ; ; count=count array[4]; type: integer ; The count array MUST EXIST on input for updates to ; occur. One or more of the counters are incremented by 1. ; count[0]: # frames written to disk as .nic or .fts ; file (updated in smei_frm_write) ; count[1]: # frames skipped because file already exists ; (updated here) ; count[2]: # frames with times earlier than the most ; recent frame (updated here) that are written to ; disk (these should be frames from the second dump; ; if the second dump contains the same frames as the ; first the number of these frames should be small) ; count[3]: # clone frames (updated here) ; last_time=last_time ; array[1]; type: time structure ; time of most recent frame processed ; status=status if set, then status files for each day are created in ; $SSWDB_SMEI/cat/sts. This keyword is used only ; in the SMEI pipeline. ; OUTPUTS: ; filename scalar; type: string ; fully-qualified file name to be used for output file. ; If /overwrite is NOT set and the file exists already ; then the null-string is returned. ; count array[3]; type: integer ; updated file counters ; last_time array[1]; type: time structure ; updated frame time ; INCLUDE: @compile_opt.pro ; On error, return to caller ; COMMON BLOCKS: common save_three_clones, last_hdrs, last common save_double_dump , overlap_on, overlap_l1a, first_dump ; CALLS: ; InitVar, IsType, CheckDir, TimeUnit, TimeGet, smei_property ; smei_filepath, smei_frm_read, TimeOp, TimeLimits, smei_filename, destroyvar ; boost, smei_buf_gzip ; RESTRICTIONS: ; If /split_dir is set then a 'mkdir --parents' is spawned to to create ; subdirectories, and gzip is spawned to compress files. This works in ; Linux, but not in Windows without some modifications. ; PROCEDURE: ; MODIFICATION HISTORY: ; MAR-2003, Paul Hick (UCSD/CASS) ; MAY-2003, Paul Hick (UCSD/CASS) ; If /overwrite is NOT and a frame exists already the telemetry times are ; compared. If the new frame is earlier than the one already on disk, ; then it is still overwritten. ; This determination is made from the name of the original telemetry file ; which is stored in the frame header. ; JUL-2003, Paul Hick (UCSD/CASS) ; Force overwrite if there is read error on an existing .nic file. ; FEB-2004, Paul Hick (UCSD/CASS) ; Added /noclones keyword ; Added new code to deal with double dump frames. Should reduce disk I/O ; considerably. ; AUG-2004, Paul Hick (UCSD/CASS) ; Introduced error handler bracketing calls to smei_property(hdr,/tlm_time). ; Occasionally the tlm_file name is screwy enough to crash ; smei_property. If this happens continue assuming that the new frame ; is from a second dump. ; MAR-2009, Paul Hick (UCSD/CASS; pphick@ucsd.edu) ; Added keyword "status". This triggers writing of "status" files ; to $SSWDB_SMEI/cat/sts each time the last frame for a given ; day has been written. Used in pipeline. ;- InitVar, split_dir , /key InitVar, nic , /key InitVar, fits , /key InitVar, overwrite , /key InitVar, gzip , /key InitVar, noclones , /key InitVar, status , /key tally = IsType(count, /defined) thdri = smei_property(hdri, /time ) camera = smei_property(hdri, /camera) mode = smei_property(hdri, /mode ) umin = TimeUnit(/minute) usec = TimeUnit(/second) ; Construct file name and check whether it exists already ; (also check for presence of gzipped frame) filename = smei_filename(thdri, camera=camera, mode=mode, type=(['.nic','.fts'])[fits]) comment = '' IF split_dir THEN BEGIN InitVar, last_dir, '' this_dir = smei_filepath(thdri,source=destination) ; Does NOT include c1,c2,c3 part ; No last_dir yet (happens at start of run) first_time = last_dir EQ '' IF first_time THEN BEGIN ; For the first frame set last_dir to the preceding day. ; If the directory for that day doesn't exist, step back ; one day at a time until a directory is found. one_day = TimeSet(/diff,1,TimeUnit(/day)) early_time = TimeSet(GetFileSpec(this_dir,part='name')) early_time = TimeOp(/subtract,early_time,one_day) last_dir = smei_filepath(early_time,source=destination) IF NOT checkdir(last_dir,/silent) THEN BEGIN i = 0 REPEAT BEGIN i++ early_time = TimeOp(/subtract,early_time,one_day) early_dir = smei_filepath(early_time,source=destination) ENDREP UNTIL checkdir(early_dir,/silent) OR i GE 7 IF checkdir(early_dir,/silent) THEN last_dir = early_dir ENDIF ;message, /info, 'previous directory was '+hide_env(last_dir) print, 'smei_buf_prep: previous directory was '+hide_env(last_dir) ENDIF IF this_dir NE last_dir THEN BEGIN ; Changing to other day IF gzip THEN smei_buf_gzip, last_dir, need_gzip ; @@ Probably have to loop back here to earlier days, until a day is found with ; @@ a status file with non-zero frame count listed. ; Update frame counts in status file for last_dir. ; Updating the frame counts in the status file to a non-zero value will ; result in last_dir to be picked up by the data pipeline. ; We want to update only if all frames for the day have been processed, ; so update only if this_dir GT last_dir. ;update_status = status AND NOT first_time AND this_dir GT last_dir update_status = status AND this_dir GT last_dir release_to_pipeline = 0 ;message, /info, 'now in '+hide_env(this_dir)+'; status update for '+hide_env(last_dir)+': '+(['no','yes'])[update_status] print, 'smei_buf_prep: now in '+hide_env(this_dir)+'; status update for '+hide_env(last_dir)+': '+(['no','yes'])[update_status] FOR i=1,3*update_status DO BEGIN ; Loop over cameras cam_prefix = 'c'+strcompress(i,/rem) sts_name = cam_prefix+'sts_'+GetFileSpec(last_dir,part='name')+'.txt' sts_remote = filepath(root=getenv('SMEIDB'),subdir=['cat','sts'],sts_name) ; The status file should already exist at this point; it gets initialized ; when the subdirectories c1,c2,c3 are created (see below). ; If it doesn't exist, too bad (probably a data gap of > 1 day); skip the ; update step and move on. IF (file_search(sts_remote))[0] EQ '' THEN BEGIN message, /info, hide_env(sts_remote)+' does not exist' continue ENDIF ; Read errors should not happen (NFS problem?) IF txt_read(sts_remote,txt,/silent) NE 1 THEN $ message, 'error reading '+hide_env(sts_remote) ; Get frame counts (total and all modes) stored in last_dir/cam_prefix tmp = file_search(filepath(root=last_dir,subdir=cam_prefix,'*.fts.gz'),count=frmcnt) tmp = GetFileSpec(tmp,part='name',/strict) ; Get frame count already stored in status file j = (where(strpos(txt,'frames :') EQ 0))[0] frmcnt_in_status_file = long( strmid(txt[j],strlen('frames :') ) ) ; If frmcnt_in_status_file equal to frmcnt, then there is nothing to update ; (both could be zero, or non-zero), or release to the pipeline. IF frmcnt_in_status_file EQ frmcnt THEN BEGIN print, 'smei_buf_prep: '+hide_env(sts_remote)+' not updated; frame count unchanged at '+strcompress(frmcnt,/rem) continue ENDIF ;j = (where(strpos(txt,'frames :') EQ 0))[0] ; Update total frame count txt[j] = 'frames :'+string(frmcnt,format='(I10)') FOR m=0,2 DO BEGIN ; Update frame counts for modes 0,1,2 mpre = strcompress(m,/rem) j = (where(strpos(txt,'mode '+mpre+' :') EQ 0))[0] txt[j] = 'mode '+mpre+' :' + $ string(round(total(strpos(tmp,cam_prefix+'m'+mpre) EQ 0)),format='(I10)') ENDFOR ;message, /info, sts_remote+' updated; frame count was '+strcompress(frmcnt_in_status_file,/rem)+' now is '+strcompress(frmcnt,/rem) print, 'smei_buf_prep: '+hide_env(sts_remote)+' updated; frame count was '+strcompress(frmcnt_in_status_file,/rem)+' now is '+strcompress(frmcnt,/rem) txt = strjoin(txt,string(10B)) ; 10B is new line spawn, 'echo "'+txt+'" | ssh soft@smei "cat - > '+sts_remote+'"' IF frmcnt_in_status_file EQ 0 THEN release_to_pipeline = 1 print, 'smei_buf_prep: '+hide_env(sts_remote)+' release to pipeline: '+(['no','yes'])[release_to_pipeline] ENDFOR IF release_to_pipeline THEN BEGIN ;message, /info, hide_env(last_dir)+' released to pipeline' print, 'smei_buf_prep: '+hide_env(last_dir)+' released to pipeline' release_file = filepath(root=getenv('SMEIDB'),subdir=['cat','sts','block'],'release_to_pipeline') spawn, 'echo "'+GetFileSpec(last_dir,part='name')+'" | ssh soft@smei "cat - >> '+release_file+'"' ENDIF ; Zips all files in the directory where the last frame was written ; if the next one is going to a different subdir. ; This should compress the c1,c2 and c3 subdirectories. CASE CheckDir(this_dir) OF 0: BEGIN ; Create directory for day. ; The camera subdirectories are create by smei_frm_write. spawn, 'mkdir '+this_dir IF not CheckDir(this_dir) THEN $ message, 'error creating '+hide_env(this_dir) message, /info, hide_env(this_dir)+' created' IF NOT overwrite THEN destroyvar, filelist FOR i=1,3*status DO BEGIN cam_prefix = 'c'+strcompress(i,/rem) sts_name = cam_prefix+'sts_'+GetFileSpec(this_dir,part='name')+'.txt' sts_remote = filepath(root=smei_filepath(mode='sts'),sts_name) txt = [ 'camera : '+strcompress(i,/rem) , $ 'frames : 0' , $ 'mode 0 : 0' , $ 'mode 1 : 0' , $ 'mode 2 : 0' , $ 'cal : 0' , $ 'base : 0' , $ 'summary : 0' , $ 'orb : 0' , $ 'sky : 0' , $ 'hdr : 0' , $ 'msk : 0' ] message, /info, hide_env(sts_remote)+' created' txt = strjoin(txt,string(10B)) ; 10B = new line spawn, 'echo "'+txt+'" | ssh soft@smei "cat - > '+sts_remote+'"' ENDFOR END 1: BEGIN message, /info, hide_env(this_dir)+' exists' FOR i=1,3*status DO BEGIN cam_prefix = 'c'+strcompress(i,/rem) sts_name = cam_prefix+'sts_'+GetFileSpec(this_dir,part='name')+'.txt' sts_remote = filepath(root=smei_filepath(mode='sts'),sts_name) IF txt_read(sts_remote,txt,/silent) THEN BEGIN cstr = 'base :' j = where( strpos(txt,cstr) EQ 0 ) base_done = fix( strmid( txt[j], strlen(cstr) ) ) IF base_done THEN BEGIN cstr = hide_env(sts_remote)+' has run through pipeline already' message, /info, cstr boost, sts_message, '\'+cstr ENDIF ENDIF ENDFOR IF IsType(sts_message,/defined) THEN BEGIN sts_message = strjoin(sts_message,string(10B)) spawn, 'echo "'+sts_message+'" | mail -s"message from SMEI pipeline" pphick@ucsd.edu jclover@ucsd.edu' ENDIF ; The first time we go into an existing directory, we pick up a list ; of frames already there. Make sure to sort the list. IF NOT overwrite THEN BEGIN cd, current=current ; Save current directory destroyvar, filelist FOR i=1,3 DO BEGIN cam_dir = smei_filepath(thdri, camera=i, source=destination) IF CheckDir(cam_dir, /stay) THEN BEGIN tmp = file_search() ; Only file names; no directories IF tmp[0] ne '' THEN BEGIN tmp = strmid(tmp,0,strlen(filename)) ; Strip .gz tmp = tmp[sort(tmp)] tmp = tmp[uniq(tmp)] ; In case both .nic and .nic.gz is present boost, filelist, tmp ENDIF ENDIF ENDFOR cd, current ; Restore original directory ENDIF END ENDCASE ; CheckDir(this_dir) ENDIF ; this_dir ne last_dir last_dir = this_dir ENDIF ; split_dir IF NOT overwrite THEN BEGIN IF split_dir THEN BEGIN ; ==== HOORAH, IT'S FUDGE TIME AGAIN !!!!!!!!! ===== IF noclones THEN BEGIN ; L1A files originating from Fairbanks occasionally duplicate frames. ; This is not the same as the 'double dump' copy. ; Duplicate frames always seem to be grouped together: 6 frames with ; identical times for cameras 2,3,1 and again 2,3,1. ; We put the check here before the file gets read (preventing the ; read is what makes this worth doing) ; We save information for the last three frames and try to intercept the ; duplicates here. The check for a clone frame is properly overkill, but ; since I don't really know what is going on we'll keep it that way for now: ; Same tlm_file ; Same l1a_file ; Frame number in l1a file must be 3 different ; Same camera ; Same time CASE n_elements(last_hdrs) OF 3: BEGIN InitVar, last, -1 last = (last+1) mod 3 is_clone = $ hdri.tlm_file EQ last_hdrs[last].tlm_file AND $ hdri.l1a_file EQ last_hdrs[last].l1a_file AND $ hdri.frame_nr EQ last_hdrs[last].frame_nr+3 AND $ hdri.camera EQ last_hdrs[last].camera AND $ TimeOp(/subtract,hdri.time,last_hdrs[last].time,TimeUnit(/sec)) EQ 0 IF is_clone THEN BEGIN filename = '' ++count[3] comment = 'CLONE' ENDIF last_hdrs[last] = hdri END 0 : last_hdrs = hdri ELSE: last_hdrs = [last_hdrs,hdri] ENDCASE ENDIF ; noclones ; ==== END FUDGE ===== ; At this point filename will be a valid name for a SMEI frame, unless it was ; identified as a clone. IF filename NE '' THEN BEGIN ; The double-dump means that essentially every frame comes along twice. ; We want to keep the first dump. Each frame header contains both the L1A and the ; tlm file of origin. Each L1A file comes from a specific tlm file, but in ; principle each tlm file could result in more than one L1A file. ; The only perfect way to find the first dump is to compare the tlm file names ; when duplicate frames are found, and retain the frame for the earliest tlm file. ; This slows down processing a lot because we have to start unzipping and reading ; frames, so we use a shortcut. ; We maintain an overlap_on switch. ; If a frame is not present on disk yet (i.e. is not on 'filelist') then ; overlap_on is switched OFF. As long as overlap_on is OFF the values of ; overlap_l1a and first_dump don't matter. ; If a frame is found that already exist on disk (i.e. already is in 'filelist') ; then overlap_on is set ON if two conditions are met: ; - overlap_on was ON already on (overlap_on is initialized to OFF) ; - and the frame is from L1A file 'overlap_l1a' (is initialized to null string) ; For the first existing frame overlap_on will always be set to OFF. ; If for an existing frame overlap_on is switched OFF then the existing frame ; is read from disk, and if successfull then: ; - overlap_on is switched ON again. ; - overlap_l1a is set to the L1A file of the current frame ; - first_dump is set to ON if the current frame is a first dump ; (as determined by comparing tlm file names). ; The values of overlap_l1a and first_dump are meaningful only if ; overlap_on is ON. ; If for an existing frame overlap_on stays ON (i.e. the previous frame also ; was an existing frame from the same L1A file), then overlap_l1a and first_dump ; remain the same. Note that this is where we save time since we don't read the ; existing frame from disk anymore, but instead just treat the current frame ; the same as the previous one. CASE 1 OF IsType(filelist, /undefined): BEGIN ; First frame in directory: start file list filelist = filename overlap_on = 0 END ; Add new frame at end of file list filename GT filelist[n_elements(filelist)-1]: BEGIN filelist = [filelist, filename] overlap_on = 0 END filename lt filelist[0]: BEGIN ; Add new frame at beginning of file list filelist = [filename, filelist] overlap_on = 0 END ELSE: BEGIN ; Since we already tested for filename lt filelist[0] and filelist is sorted ; we will never get tmp = -1 here. tmp = (where(filename LE filelist))[0] CASE filename OF filelist[tmp]: BEGIN ; The frame already is in file list InitVar, overlap_on , 0 InitVar, overlap_l1a, '' overlap_on = overlap_on AND smei_property(hdri, /l1a_file) EQ overlap_l1a CASE overlap_on OF 0: begin ; To decide whether to overwrite or not we need the header from disk fullname = filepath(root=smei_filepath(thdri, camera=camera, source=destination), filename) tmp = smei_frm_read( fullname+'.gz', hdr=old, silent=2, error=comment, /nodata) IF comment ne '' THEN tmp = smei_frm_read( fullname, hdr=old, silent=2, error=comment, /nodata ) ; Check for presence of tlm file name. If old is NOT a structure at ; this point then probably a keyword has been added to the Fits header ; since the file was written, so we better ovewrite it. CASE IsType(old,/structure) OF 0: comment = ' Out-of-date header? Overwriting!' 1: IF comment EQ '' THEN IF strlen(old.tlm_file) EQ 0 THEN comment = 'no_tlm_file' ENDCASE IF comment EQ '' THEN IF strlen(old.tlm_file) EQ 0 then comment = 'no_tlm_file' ; If there is a read error then probably the file is corrupt, ; so overwrite it. If there is no read error, overwrite only if ; the new file is a first dump. overlap_on = comment EQ '' IF overlap_on THEN BEGIN ; Successfull read overlap_l1a = smei_property(hdri, /l1a_file) old_tlm = smei_property(old , /tlm_file) new_tlm = smei_property(hdri, /tlm_file) ; Compare names of tlm files. If the tlm file name for the current frame ; is earlier than the tlm file of the frame on disk then the new frame ; is from the first dump, and we want to overwrite the copy on disk. catch, error IF error EQ 0 THEN BEGIN old = smei_property(old , /tlm_time) new = smei_property(hdri, /tlm_time) first_dump = TimeOp(/subtract, new, old, umin) LT 0 CASE first_dump OF 0: message, /info, 'skip 2nd dump ('+TimeGet(new,/ymd,upto=umin)+')' 1: message, /info, 'new '+filename+' ('+strjoin(TimeGet([new,old],/ymd,upto=umin),' < ')+')' ENDCASE filename = (['',filename])[first_dump] comment = (['2','1'])[first_dump] ENDIF ELSE BEGIN message, /info, 'Problem differentiating 1st and 2nd dump. Assuming 2nd dump' message, /info, 'Old TLM: '+old_tlm message, /info, 'New TLM: '+new_tlm first_dump = 0 filename = '' comment = 'TLM' ENDELSE catch, /cancel ENDIF END 1: BEGIN ; Treat the same as previous frame. filename = (['',filename])[first_dump] comment = (['2','1'])[first_dump] END ENDCASE END ELSE: BEGIN ; Insert new frame in file list CASE tmp EQ 0 OF 0: filelist = [filelist[0:tmp-1], filename, filelist[tmp:*]] 1: filelist = [filename, filelist] ENDCASE overlap_on = 0 END ENDCASE ; filename END ENDCASE ; 1 ENDIF ; filename ne '' ENDIF ; split_dir IF filename NE '' THEN BEGIN IF IsType(last_time, /defined) THEN BEGIN IF TimeOp(/subtract, thdri, last_time, usec) LE 0 THEN IF tally THEN ++count[2] last_time = TimeLimits([last_time, thdri], /max) ENDIF ENDIF ENDIF ; not overwrite CASE filename eq '' OF 0: BEGIN case split_dir of 0: filename = filepath(root=destination, filename) 1: filename = filepath(root=smei_filepath(thdri, camera=camera, source=destination), filename) ENDCASE END 1: IF tally THEN ++count[1] ENDCASE RETURN, filename & END