#!/usr/bin/python

#+
# NAME:
#	www_help
# PURPOSE:
#	Python cgi script used to retrieve code from original
#	source code files in on-line www_help system
# INPUTS:
#	The www_help systems provide 3 arguments to this cgi script:
#	- A file name in the form $TOP/sub1/.../file_name.code
#	- A procedure name (the entry following the NAME sections in the
#	  source code header
#	- Number of spaces per tab
# RESTRICTIONS:
#	Requires a valid file www_list.lst containing definitions of
#	top directories in the form
#		$TOP=full_directory name
# SEE ALSO:
#	www_help
# PROCEDURE:
# >	See the IDL www_help package for more information.
#
# >	The start of the file name ($HAT in the example) refers to a
#	top directory defined in the file www_list.lst.
# >	www_list.lst should contain at least one entry
#		$HAT=dir_where_headings
#	specifying the directory where the files with the section
#	headings are stored (in SSW this should be
#		~/ssw/smei/gen/idl/gen).
#
# >	Requires a web server with python installed.
#	The script has only been run on a Linux box so far.
#
# >	Problems could arise when lines read from a source code file
#	contain special html characters (like <,>,&).
#	This is avoided by using the cgi.escape function. This is done
#	immediately after reading a new line of code, i.e. prior to
#	any further processing (this processing involves adding html
#	tags; these would get lost if the processed line of code is
#	run through cgi.escape.
#
#	There is a problem with IDL here: special html characters
#	are replaced by character combinations like &gt;
#	i.e. and ampersand at the start and a semicolon at the end.
#	Care must be taken that the semi-colon is not interpreted as
#	the start of and IDL comment. This is done by looking for
#	an ampersand preceeding with at least 1 at most 3 intermediate
#	characters.
# MODIFICATION HISTORY:
#	DEC-2001, Paul Hick (UCSD/CASS), v0.0
#	JAN-2002, Paul Hick (UCSD/CASS)
#		Introduced www_help.lst to store definitions for
#		environment variables.
#	JUL-2002, Paul Hick (UCSD/CASS)
#		Added option to omit lines from being displayed.
#	JAN-2004, Paul Hick (UCSD/CASS), v1.0
#		Added checks for parameters and include statements for
#		Fortran code.
#	JUL-2005, Paul Hick (UCSD/CASS), v2.0
#		Added 'auto_calls' section. IDL is now used to find all
#		routines called by IDL and Fortran code by calling
#		www_help_get_info.pro.
#	JUL-2005, Paul Hick (UCSD/CASS), v2.1
#		Removed dependence on string module.
#	JUL-2006, Paul Hick (UCSD/CASS)
#		Fixed bug in detecting function declaration lines.
#	DEC-2006, Paul Hick (UCSD/CASS; pphick@ucsd.edu)
#		The call to the IDL procedure www_help_get_info suddenly
#		returns a leading new line (since IDL version 6.3???)
#		leading to an empty 'auto_calls' list.
#-

import sys, os, cgi

global code_color, text_color, mark_color, title_color, param_color, incl_color, end_color
global comment, tabsize, esc
global show_code

# ==========
# Reads list of top directories
# Entries in tops have the form $NAME=full_directory

def read_toplist(file):
	tops = (open('www_help.lst', 'r')).read().split()
 
	for top in tops:
		name = top.split('=')
		if file.find(name[0]) == 0:
			break
	else:
		name = file.split('/')
		print 'Do not know where ',name[0],' is'
		return ''

	name = name[1]+file[len(name[0]):]

	return name

# ==========
# Reads next line from source code file

def read_next_line(iu):
	line   = iu.readline()	# Read 1st line

	l = len(line)			# Strip carriage return from DOS records
	if line.find('\r') == l-2:
		line = line[0:l-2]+line[l-1:]

	line = line.expandtabs(tabsize)

	if esc:					# Convert special html characters
		line = cgi.escape(line)
	else:
		line = cgi.escape(line)

	return line

# ==========

def print_line(first_line,prefix,line,name):

	global show_code

	if line[0] != comment and first_line:
		p = line.find(name)
		if p != -1:
			ok = p == 0					# Name at beginning of line ?
			if not ok:					# If not, check that preceding char is not alphanumeric
				ok = not is_alnum(line[p-1])

			if ok:
				ok = p+len(name) == plast# Name at end of line ?
				if not ok:				# If not, check that following char is not alphanumeric
					ok = not is_alnum(line[p+len(name)])

			if ok:						# Bracket with color changes
				line = line[:p]+title_color+name+end_color+code_color+line[p+len(name):]

		print code_color+line[:-1]

	else:
		if line.find('@'+'CLASSIFY') != -1:
			show_code = False 
		elif line.find('@'+'UNCLASSIFY') != -1:
			show_code = True
			print prefix+'-LINES OMITTED-'
		elif show_code:
			print prefix+line[:-1]

	return 0

# ==========
# Check for start or end of header

def start_header(line):
	return line.find(comment+'+') == 0

def end_header(line):
	return line.find(comment+'-') == 0

def is_alnum(single_char):
	return single_char.isalnum() or single_char == '_'
 
def is_alpha(single_char):
	return single_char.isalpha() or single_char == '_'
 
# ==========
# Read file 'sections' containing list of valid header keys

def get_section_list(sections):
	list = []

	islist = os.path.isfile(sections)

	if islist:
		iu   = open(sections, 'r')
		line = read_next_line(iu)

		while len(line):
			if start_header(line):
				break
			line = read_next_line(iu)
		else:
			print 'start of header not found: ',sections
			islist = 0

		if islist:
			line = read_next_line(iu)

			while len(line):
				if end_header(line):
					break

				if line[len(line)-2] == ':':
					list.append(line)

				line = read_next_line(iu)
			else:
				print 'end of header not found: ',sections
				islist = 0

		iu.close()

	return list

#===========

def fortran_syntax(line):
	global plast

	# Look for parameters containing __ in Fortran code

	p = line.rfind('__',0,plast+1)
	while p != -1:						# __ found
		p0 = p+len('__')				# Pos char after __
										# __ must be preceded and followed by letter or digit
		ok = p != 0 and line[p-1].isalnum() and p0 < plast and line[p0].isalnum()
		if ok:
			p = p-1						# Skip back to non-char
			while p > 0 and is_alnum(line[p-1]):
				p = p-1

			p0 = p0+1					# __ must be followed by letter or digit
			while p0 < plast and is_alnum(line[p0]):
				p0 = p0+1

			ok = (p == 0  or line[p-1] != "'") and line[p0] != "'"

			if ok:
				line = line[0:p]+end_color+param_color+line[p:p0]+end_color+code_color+line[p0:]
				plast = plast+len(end_color)+len(param_color)+len(end_color)+len(code_color)

		p = line.rfind('__',0,p)		# Look for another __ earlier in the line

	# Look for include statements

	p = line.find('include',0,plast+1)
	if p != -1:
		p  = line.find ("'",p+len('include'),plast+1)
		p0 = line.rfind("'",p+len('include'),plast+1)
		line = line[0:p+1]+incl_color+line[p+1:p0]+end_color+code_color+line[p0:]
		plast = plast+len(incl_color)+len(end_color)+len(code_color)

	return line

#===========

def idl_syntax(line):
	global plast

	# Look for parameters containing ! in IDL code (system variables)

	p = line.rfind('!',0,plast+1)
	while p != -1:						# ! found
		p0 = p+1						# Pos char after !
										# ! must be followed by letter
		ok = p0 < plast and is_alpha(line[p0]) 

		if ok:
			p0 = p0+1					# ! must be followed by letter or digit
			while p0 < plast and is_alnum(line[p0]):
				p0 = p0+1

			ok = (p == 0  or line[p-1] != "'") and line[p0] != "'"

			if ok:
				line = line[0:p]+end_color+param_color+line[p:p0]+end_color+code_color+line[p0:]
				plast = plast+len(end_color)+len(param_color)+len(end_color)+len(code_color)

		p = line.rfind('!',0,p)			# Look for another ! earlier in the line

	# Look for include statements

	p = line.find('@',0,plast+1)
	if p != -1:
		p0 = p+1						# Search for end of file name
		while p0 < plast and is_alnum(line[p0]):
			p0 = p0+1

		line = line[0:p+1]+incl_color+line[p+1:p0]+end_color+code_color+line[p0:]
		plast = plast+len(incl_color)+len(end_color)+len(code_color)

	# Look for structure definitions

	p = line.find('{',0,plast+1)
	if p != -1:
		p0 = p+1
		while p0 < plast and '} ,'.find(line[p0]) == -1:
			p0 = p0+1

		line = line[0:p+1]+incl_color+line[p+1:p0]+end_color+code_color+line[p0:]
		plast = plast+len(incl_color)+len(end_color)+len(code_color)


	return line

# ==========

sys.stderr = sys.stdout

print "Content-type: text/html\n"


code_color = '<font color="blue">'
text_color = '<font color="red">'
mark_color = '<font color="black">'
title_color= '<font color="purple">'
param_color= '<font color="green">'
incl_color = '<font color="magenta">'
end_color  = '</font>'
gap  = 3
show_code = False

form = cgi.FieldStorage()

# Pick up the fully qualified file name containing the source code.

if form.has_key('file'):
	file = form['file'].value
	name = form['name'].value
	tabsize = int( form['tabsize'].value )

else:

	# The following is primarily for testing from the command prompt

	n = len(sys.argv)
	if n > 1:
		file = sys.argv[1]
	else:
		file = '$SMEI/user/phick/for/main/sd.f'

	if n > 2:
		name = sys.argv[2]
	else:
		name = 'SD'

	if n > 3:
		tabsize = int( sys.argv[3] )
	else:
		tabsize = 8

low_name = name.lower()

# Check whether we are in the SSW tree
# Also set up the file name for the section headings

ssw_tree = file.find('$SSW') == 0
headings = read_toplist('$HAT')
if headings == "":
	sys.exit()

# Replace the environment variable at the start of the file name
# by the fully qualified directory name.

if file[0] == '$':
	file = read_toplist(file)
	if file == '':
		sys.exit()

# Check whether file exists

if not os.path.exists(file):
	print 'file does not exist, ',file
	sys.exit()


parts    = os.path.split   (file    )
def_name = os.path.splitext(parts[1])

file_name = def_name[0]
extension = def_name[1]

smei_path = read_toplist('$SMEI')
idl_startup = os.path.join(smei_path,'pro','idl_startup.pro')

idl_dir  = ''
idl_exec = ''
idl_path = ''

if extension == '.pro':
	comment  = ';'
	comment2 = ';'
	program  = ['function', 'pro']
	tabsize  = 4
	esc      = 1

	# Name of idl executable

	idl_dir  = read_toplist('$IDL')
	idl_exec = os.path.join(idl_dir,'bin','idl')

	if ssw_tree:
		sections = os.path.join(headings,'sections_ssw.txt')
		idl_path = read_toplist('$SSW')
	else:
		sections = os.path.join(headings,'sections_pro.txt')
		idl_path = read_toplist('$SMEI')

	if idl_path != '':
		idl_path = '+'+idl_path

elif extension == '.f':
	comment  = 'C'
	comment2 = '!'
	program  = ['function', 'subroutine', 'entry']
	tabsize  = 8
	esc      = 1 
	sections = os.path.join(headings,'sections_for.txt')

	idl_dir  = read_toplist('$IDL')
	idl_exec = os.path.join(idl_dir,'bin','idl')

elif extension == '.h':
	comment  = 'C'
	comment2 = '!'
	program  = []
	tabsize  = 8
	esc      = 1 
	sections = os.path.join(headings,'sections_for.txt')

else:
	comment  = '#'
	comment2 = '#'
	program  = []
	tabsize  = 8
	esc      = 1 
	sections = os.path.join(headings,'sections_script.txt')

low_comment  = comment.lower()
low_comment2 = comment2.lower()

list = get_section_list(sections)
islist = len(list) > 0

# Find the module 'name'.
# The name can be picked up in two ways:
# - a line preceeding the header, e.g.   subroutine WR2DARR
# - inside the header after the key NAME:

found     = 0
in_header = 0
in_name   = 0

if ssw_tree:
	name_keys = [' NAME',' FILE',' ROUTINE','PROGRAM','FUNCTION','TITLE']
else:
	name_keys = [' NAME']
    
for n in range(len(name_keys)):
	name_keys[n] = comment+name_keys[n]+':'

f_in = open(file, 'r')			# Open source code file

line = read_next_line(f_in)

ask_cnt = 0

while len(line) > 0:

	read_again = 1
	get_name = 0

	if start_header(line):		# Entering header
		in_header = 1
		in_name   = 0

	elif end_header(line):		# Exiting header
		in_header = 0

	elif in_header:
		if not in_name:
			for key in name_keys:
				if line.find(key) == 0:
					in_name = 1
					if len(line) > len(key)+1:
						line = comment+' '+line[len(key):].strip()
						read_again = 0
					break

		elif list.count(line):
			in_name = 0

		else:
			pos = line.lower().find(low_name)  # Case-insensitive search
			get_name = pos != -1
 
	else:
		pos = line.lower().find(low_name)	# Case-insensitive search
		get_name = pos != -1


	if get_name:
		parts = line.lower().split()

		if len(parts) > 1:
			part0 = parts[0].strip()
			part1 = parts[1].strip()

			if part1.find(low_name) == 0:   # part1 starts with name
				ok = len(name) == len(part1)
				if not ok:
					ok = not is_alnum(part1[len(name)])

				if ok:
					for p in program:
						found = parts[0] == p
						if found:
							break
					else:
						found = parts[0] == low_comment

	if found:
		break

	if read_again:
		line = read_next_line(f_in)
 
if not found:
	print 'Could not find procedure "'+name+'" in '+file
	sys.exit()

auto_calls = idl_exec != ''

if auto_calls:

	cmd = [ 'compile_opt strictarr'		, \
			'!path=!path+'+'\':\''+'+expand_path(\'+'+headings+'\')' ]
	if idl_path == '':
		cmd.append('print, www_help_get_info(\''+name+'\',\''+file+'\')')
	else:
		cmd.append('print, www_help_get_info(\''+name+'\',\''+file+'\',expand_path(\''+idl_path+'\'))')

	cmd = ' & '.join(cmd)	

	# /usr/local/bin is needed to locate ftnchek
	# $SMEI is needed by idl_startup.pro
	# idl_startup.pro is needed to define env vars
	# IDL_DEV is set to speed up starting IDL

	cmd = [	'PATH=$PATH:/usr/local/bin'		, \
			'SMEI='+smei_path				, \
			'IDL_DIR='+idl_dir				, \
			'IDL_DEV=Z,none'				, \
			'IDL_STARTUP='+idl_startup		, \
			'export PATH SMEI IDL_DIR IDL_DEV IDL_STARTUP', \
			'echo "'+cmd+'" | '+idl_exec+' -quiet'	]

	cmd = '; '.join(cmd)

	m = (os.popen(cmd)).read().split('\n')

	# Find first non-trivial element

	for mods in m:
		if mods != '':
			mods = mods.split(', ')
			break
	else:
		mods = []		

	mods.insert(0,name)

print      \
	'<html>\n'      \
	'<head>\n'      \
	'<title>www_help version 2.6</title>\n'  \
	'</head>\n\n'   \
	'<body>\n'      \
	'<pre>'

first_line = True
first_code = True
show_code  = True

if line[0] != comment:
	prefix = code_color
	plast  = len(line)-1
	first_line = print_line(first_line,prefix,line,name)
	first_code = False

line = read_next_line(f_in)

in_comment = 0
in_calls   = 0
has_calls  = auto_calls
in_code    = 0

while len(line) > 0:

	parts = line.lower().split()

	if start_header(line):		# Entering header
		in_header = 1
		if in_code:
			in_code = 0
			break

	elif line.find('CACK_OOPS_!@#$%^&') == 0:
		break

	if len(parts) > 1:
		part0 = parts[0].strip()
		part1 = parts[1].strip()

		# Look for lines like "PRO procedure_name"

		for p in program:

			# The following test is never true in a comment
			# (then part0 would be the comment character)

			if part0 == p:		# Never happens in a comment line

				# part0 is a function/procedure declaration

				if part1.find(low_name) != 0:

					# part1 is not the current function name. This should not
					# happen if first_code=True (because then the current line
					# is the very line from which the name was extracted)

					if first_code:
						print 'misspelled function name "'+low_name+'"? in line:'
						print line
						sys.exit()

					# part1 is not the current function name, so it must be
					# the start of another function. Done!

					done = 1
					#if not first_code:
					#	done = 1

					break

				if len(part1) == len(name):

					# part1 is the current function. This must be the line
					# from which the current name was subtracted (first_code=True)
					# or this is a duplicate function declaration.

					if not first_code:
						print 'duplicate function?: ', part0, ' ',part1
						sys.exit()

				elif part1[len(name)].isalnum():

					# part1 starts with the current function name, but has
					# additional alphanumeric characters. Must be the start of
					# a new function.

					first_code = False
					done = 1

					break

		else:
			done = 0
 
		if done:
			break
 
	if islist and not auto_calls:
		if in_header:
			if line.find(comment+' CALLS:\n') == 0:
				in_calls = 1			# Found CALLS key in header
				mods = [name]			# Initialize array of called modules
			elif in_calls:
				if list.count(line):	# Found next header key: exit CALLS section
					in_calls = 0
					has_calls = len(mods) > 0	# mods array could be empty
				else:
					new = line[1:].split(',')
					for n in range(len(new)):
						new[n] = new[n].strip()
						if len(new[n]) > 0:
							mods.append(new[n])

	# Lines are written to html file if
	#   - line not part of header (could still be a comment), or
	#   - line in header but does not start with comment character.

	if not in_header or line[0] != comment:
		prefix  = ''
		plast   = len(line)-1

		if line[0] == comment:		# Line starts with comment char

			# If line is the first line of a comment block outside header,
			# then switch to a blue font before writing the line.

			if not in_header and not in_comment:
				prefix = text_color
				in_comment = 1
			else:
				prefix = ''

		else:						# Line does not start with comment char

			# If line is the last line of a comment block outside header,
			# then switch to a black font before writing the line.

			if not in_header and in_comment:
				prefix = code_color
				in_comment = 0
			elif first_line:
				prefix = code_color

			# The line could still contain a comment at the end.
			# Look for the secondary comment character following an even
			# number of single quotes.

			find_c = 1
			p = -1
			while find_c:
				p0 = p

				p = line.find(comment2,p0+1)
				if p == -1:			# No secondary comment
					break
 
				find_c = line[0:p].count("'") % 2

				if not find_c and comment2 == ';':

					# Found a secondary comment char following an even number of
					# quotes in IDL code. We still need to check for character
					# combinations like &gt;, i.e. an IDL comment character
					# preceeded very closely (no more than 'gap' characters) by
					# an ampersand. The semi-colon was put in by cgi.escape and
					# should not be interpreted as the start of and IDL comment.

					amp = line[0:p].rfind('&')
					amp = p-1-amp       # Chars between ampersand and semicolon

					find_c = amp != p-2 and amp != 0 and amp <= gap

			else:					# Executed if find_c = 0

				plast = p			# Location of secondary comment char

				line = line[0:p]+end_color+text_color+line[p:len(line)-1]+end_color+code_color+'\n'

		if not in_comment:

			if extension == '.f':
				line = fortran_syntax(line)
			elif extension == '.pro':
				line = idl_syntax(line)

			# Look for entries in the 'mods' list.

			if has_calls:

				for m in mods:						# Loop over all CALLS modules
					if auto_calls:
						p = line.lower().rfind(m.lower(),0,plast+1)	# Right-most occurence of 'm'
					else:
						p = line.rfind(m,0,plast+1)	# Right-most occurence of 'm'

					while p != -1:					# m found 
						ok = p == 0					# m at beginning of line ?
						if not ok:					# If not, check that preceding char is not alphanumeric
							ok = not is_alnum(line[p-1])

						if ok:
							ok = p+len(m) == plast	# m at end of line ?
							if not ok:				# If not, check that following char is not alphanumeric
								ok = not is_alnum(line[p+len(m)])

							if ok:					# Bracket m with color changes
								if not first_line:
									line  = line[0:p]+end_color+mark_color+line[p:p+len(m)]+end_color+code_color+line[p+len(m):]
									plast = plast+len(end_color)+len(mark_color)+len(end_color)+len(code_color)

						if auto_calls:
							p = line.lower().rfind(m.lower(),0,p)# Look for another m earlier in the line
						else:
							p = line.rfind(m,0,p)	# Look for another m earlier in the line

		first_line = print_line(first_line,prefix,line,name)


	if end_header(line):		# Exiting header
		in_header = 0
		in_code   = 1


	line = read_next_line(f_in)

f_in.close()


print '</pre>\n</body>\n</html>'