#! /usr/bin/env python

import sys, os
from reference_class import *
import tiny_bits

say_str = os.path.splitext(os.path.split(__file__)[1])[0]

def split_regex( regex_string ):
	"""
	#+
	# NAME:
	#	split_regex
	# PURPOSE:
	#	Interpret regular expression
	# INPUTS:
	#	regex_string	string
	#		comma separated list of key=value pairs.
	#		If the key is omitted (i.e. only the value is specified) then
	#		the key name is assumed to be 'key'.
	#		The value is optionally bracketed by single or double quotes
	#
	#		key names 'key' and 'cat' are used to filter the topkeys list
	# OUTPUTS:
	#	result			dictionary of key-value pairs
	# EXAMPLE:
	#	input:			output:
	#	value1			{'key': 'value1'}
	#	key=value1		{'key': 'value1'}
	#	k=value1,value2	{'k': 'value1', 'key': 'value2'}
	#
	#	'cat','key','attr' have special meaning.
	#	All others refer to entries associated with
	#	with publications: author, title, etc.
	#-
	"""
	say = tiny_bits.say('%s.%s'%(say_str,tiny_bits.whoami()))

	regex_map = dict()
	if regex_string != '':
		string = ','+regex_string+','	# Add leading and trailing comma

		# start with comma
		# (optional) anything that is not an equal sign or a comma, up to equal sign
		# double quote, single quote, or null
		# (optional) anything
		# closing double quote, single quote, or null
		# end with comma

		r = re.compile(r',([^=,]+=)?("|\'|)(.+?)\2,')
		say.debug( string )
		m = r.match(string)
		while m:
			key = m.group(1)[0:-1] if m.group(1) else 'key'
			val = m.group(3)
			regex_map[key] = val
			say.debug( "%s -> %s"%(key,val) )
			if m.end(0) == len(string):
				break
			string = string[m.end(0)-1:]
			say.debug( string )
			m = r.match(string)

	say.say( 'regex map %s'%regex_map )

	return regex_map

if __name__ == '__main__':

	from optparse import OptionParser

	default_keyword_stack	 = \
		os.path.join( os.environ['ECLIPSE'], 'reports','etc','data-papers.keys'	 ) if os.environ.has_key('ECLIPSE') else\
		os.path.join( os.environ['SMEI'], 'ucsd','gen','etc','solar_system.keys' ) if os.environ.has_key('SMEI'   ) else \
		''
	default_reference_list = \
		os.path.join( os.environ['ECLIPSE'], 'reports','etc','data-papers.yaml') if os.environ.has_key('ECLIPSE') else \
		os.path.join( os.environ['SMEI'], 'ucsd','gen','etc','solar_system.yaml'  ) if os.environ.has_key('SMEI'   ) else \
		''

	version = '0.00'
	usage = "%prog <publication-file>\n" +	\
		("\tdefault ReferenceList: %s\n"%default_reference_list if default_reference_list != '' else '') + \
		("\tdefault KeywordStack : %s\n"%default_keyword_stack  if default_keyword_stack  != '' else '') + \
		"\t--format html-par[=html-root,head-file,tail-file] |\n" + \
		"\t         html-list[=html-root,head-file,tail-file] |\n" + \
		"\t         xinc-par | xinc-list | simple | bibtex | raw"

	parser = OptionParser(usage=usage,version=version)

	parser.add_option('-v', '--verbose',
		dest		= 'verbose'		,
		action		= 'store_true'						,
		default		= False								,
		help		= 'verbose output'
	)

	parser.add_option('', '--debug'		,
		dest		= 'debug'			,
		action		= 'store'			,
		type		= 'int'			,
		default		= 0					,
		help		= 'set debug level'	,
	)

	parser.add_option('-n', '--dry-run'	,
		dest		= 'dryrun'			,
		action		= 'store_true'		,
		default		= False				,
		help		= 'make dryrun'		,
	)

	parser.add_option('', '--start-time',
		dest		= 'start_time'		,
		action		= 'store'			,
		type		= 'string'			,
		default		= None				,
		help		= 'use only pubs later than start date YYYY-MM (inclusive)'
	)

	parser.add_option('', '--stop-time'	,
		dest		= 'stop_time'		,
		action		= 'store'			,
		type		= 'string'			,
		default		= None				,
		help		= 'use only pubs earlier than stop date YYYY-MM (exclusive)'
	)

	parser.add_option('', '--keyword-file'	,
		dest		= 'keyword_file'		,
		action		= 'store'				,
		type		= 'string'				,
		default		= default_keyword_stack	,
		help		= 'YAML file with categorization of top keys'
	)

	parser.add_option('', '--show-keywords'	,
		dest		= 'show_keywords'		,
		action		= 'store_true'			,
		default		= False					,
		help		= 'show layout of keyword categorization'
	)


	parser.add_option('-a', '--attributes'	,
		dest		= 'attributes'			,
		action		= 'store'				,
		type		= 'string'				,
		default		= ''					,
		help		= 'paper attributes in form attr1=True|False,attr2=True|False'
	)

	parser.add_option('-f', '--find-regex'	,
		dest		= 'find_regex'			,
		action		= 'store'				,
		type		= 'string'				,
		default		= ''					,
		help		= 'use only refs matching specified regex'
	)

	parser.add_option('-p', '--print-references',
		dest		= 'print_references'	,
		action		= 'store_true'			,
		default		= False					,
		help		= 'print references'
	)

	parser.add_option('', '--format'	,
		dest		= 'format'			,
		action		= 'store'			,
		type		= 'string'			,
		default		= 'simple'				,
		help		= 'defines format for printing references: simple, raw, bibtex, html-par, html-list, xinc-par, xinc-list'	,
	)

	# authors_by_country_detail:	'country of origin of authors on papers using CAIDA data'
	# papers_by_category_detail:	'published papers using CAIDA data'

	parser.add_option('', '--title'	,
		dest		= 'title'			,
		action		= 'store'			,
		type		= 'string'			,
		default		= ''				,
		help		= 'title for html page'	,
	)

	parser.add_option('', '--per-year'	,
		dest		= 'per_year'		,
		action		= 'store_true'		,
		default		= False				,
		help		= 'print reference count for each year'
	)

	parser.add_option('', '--authors-by-country-summary',
		dest		= 'authors_by_country_summary'		,
		action		= 'store_true'						,
		default		= False								,
		help		= 'write html page to stdout with table for nr of authors per country'
	)

	parser.add_option('', '--authors-by-country-detail'	,
		dest		= 'authors_by_country_detail'		,
		action		= 'store_true'	,
		default		= False			,
		help		= 'write table for nr of authors per country per year and month'
	)

	parser.add_option('', '--papers-by-category-detail'	,
		dest		= 'papers_by_category_detail'		,
		action		= 'store_true'	,
		default		= False			,
		help		= 'write table with nr of papers per category per year and month'
	)

	parser.add_option('', '--papers-by-feature-detail'	,
		dest		= 'papers_by_feature_detail'		,
		action		= 'store_true'	,
		default		= False			,
		help		= 'write table with nr of papers per feature per year and month'
	)

	parser.add_option('', '--papers-summary'	,
		dest		= 'papers_summary'			,
		action		= 'store_true'	,
		default		= False			,
		help		= 'writes table with nr of papers per year and month'
	)

	parser.add_option('', '--disjoint-categories'	,
		dest		= 'disjoint_cat'				,
		action		= 'store_true'	,
		default		= False			,
		help		= 'count only the first category for a paper'
	)


	options, args = parser.parse_args()

	pubfile = args[0] if len(args) > 0 else default_reference_list

	if options.keyword_file == '':
		parse.error( 'no KeywordStack file specified'  )
	if pubfile == '':
		parse.error( 'no ReferenceList file specified' )

	say = tiny_bits.say(
		label	= os.path.splitext(os.path.split(__file__)[1])[0]	,
		verbose	= max(options.verbose,options.debug)				,
		dryrun	= options.dryrun									,
	)

	regex_map = split_regex( options.find_regex )

	topkeys = KeywordStack( options.keyword_file )
	try:
		#topkeys = KeywordStack( options.keyword_file )
		topkeys.filter_by_regex(regex_map)						# Retain categories/keywords matching regex
		# TODO: Need more checking here to make sure that attributes exist in keyword_file
		topkeys.filter_attributes(options.attributes)			# Only retain topkeys with the specified attributes
	except:
		raise ReferenceError( '%s, topkeys not available'%tiny_bits.whoami() )

	if options.show_keywords:
		topkeys.keep_zero = True
		topkeys.keep_attr = True
		say.yell( topkeys.__str__() )
		sys.exit()

	lst = ReferenceList(pubfile,options.start_time,options.stop_time)	# Read YAML file
	lst = lst.filter_keywords(topkeys)									# Restrict keywords
	lst = lst.filter_by_regex(regex_map)								# Restrict by regex

	if say.DRYRUN:
		say.done()

	if options.authors_by_country_summary:

		lst.set_html_mode (options.format)
		lst.set_title     (options.attributes)
		lst.authors_by_country_summary()

	elif options.authors_by_country_detail:

		lst.set_title(options.title)
		lst.authors_by_country_detail()

	elif options.papers_summary:

		lst.set_html_mode (options.format)
		lst.set_title     (options.attributes)
		lst.papers_summary(topkeys)

	elif options.papers_by_category_detail:

		lst.set_title(options.title)
		lst.papers_by_category_detail(topkeys, options.disjoint_cat)

	elif options.papers_by_feature_detail:

		lst.set_title(options.title)
		lst.papers_by_feature_detail(topkeys)

	elif options.print_references:

		if options.format[0:3] == 'raw':
			#x = lst.__repr__()
			#print x[0:2680]
			print lst.__repr__()

		elif options.format[0:4] in ['html','xinc']:

			lst.set_html_mode(options.format[0:4])

			if options.format[4:8] == '-par':
				lst.set_html_paragraph(True)
			else:					# options.format[4:8] == '-list'
				lst.set_html_list(True)

			html_head = ''
			html_tail = ''
			if options.format[0:4] == 'html' and '=' in options.format:
				html,files = options.format.split('=')
				files = files.split(',')
				if len(files) > 0:
					lst.set_html_root(files[0])
					say.say( "html root directory is '%s'"%files[0] )
				if len(files) > 1:
					html_head = open(files[1]).read()[0:-1]
					say.say( "html head file is '%s'"%files[1] )
				if len(files) > 2:
					html_tail = open(files[2]).read()[0:-1]
					say.say( "html tail file is '%s'"%files[2] )

			#x = lst.custom_print(html_head,html_tail)
			#sys.stderr.write( x[613800:613847]+'\n' )
			print lst.custom_print(html_head,html_tail)

		elif options.format == 'bibtex':

			print lst.print_bibtex(topkeys)

		elif options.format == 'test':

			for x in lst.atoms:
				say.yell( "----------------> %s"%x.hash['MARKER'] )
				print x.__repr__()
				#try:
				#	print x.__repr__()
				#except:
				#	print "-------------->", x.hash['MARKER']
				#	sys.exit()

		else:

			print lst

	elif options.per_year:
		topkeys.count_atoms(lst)

		for year in range(lst.start_time.get(attr='year'),lst.stop_time.get(attr='year')+1):
			sub_topkeys = topkeys.copy(year)
			sub_topkeys.category_only = True
			sub_topkeys.title = 'in %s'%year
			print sub_topkeys

	else:
		topkeys.count_atoms(lst)
		topkeys.category_only = False
		print topkeys

sys.exit(0)