# Keith Briggs 2017-07-15 - added latex output

from sys import stderr,path,exit
import re
from regnal_year_03 import parse_regnal_year_spec,get_regnalyear,int_to_roman

wordy_expander={
  '': '',
  '<':     'before ',
  '>':     'after ',
  'circa': 'circa ',
  'C':     'circa ',
  'E':     'early ',
  'M':     'middle of the ',
  'L':     'late ',
  'EM':    'early to middle ',
  'ML':    'middle to late ',
  '1T':    'first third of the ',
  '2T':    'middle third of the ',
  '3T':    'last third of the ',
  '1Q':    'first quarter of the ',
  '2Q':    'second quarter of the ',
  '3Q':    'third quarter of the ',
  '4Q':    'fourth quarter of the ',
}

eml_shift={
  'E':   33.0,
  'M':   66.0,
  'L':  100.0,
  'EM':  75.0,
  'ML': 100.0,
  '1Q':  25.0,
  '2Q':  50.0,
  '3Q':  75.0,
  '4Q': 100.0,
  '1T':  33.0,
  '2T':  66.0,
  '3T': 100.0,
}

prenote    ='(\[(?P<prenote>.*?)\])?'
postnote   ='(\[(?P<postnote>.*?)\])?'
circa      ='(?P<circa%d>(c\.?)|(circa))'
uncertain  ='(?P<uncertain%d>\?)'
ba         ='(?P<ba%d>[<>])' # before or after
third      ='[123]t'
quarter    ='[1234]q'
eml        ='(em)|(ml)|[eml]'
prefix     ='(?P<prefix%s>(%s)|(%s)|(%s))'%('%d',third,quarter,eml,)
simplerange='(?P<simplerange>(1\d\d\d[-]\d)|(1\d\d\d[-]\d\d)$)' # post-1000 only
century    ='(?P<century%d>\d\d?)[Cc]'
decade     ='(?P<decade%d>\d{2,3}0)s'
year       ='(?P<year%d>\d{1,4})'
oldstyle   ='(?P<oldstyle%d>(\d{3}[012345678]/\d)|(\d{3}9/\d{2}))'

first_dict={
  'uncertain': uncertain%0,
  'ba':        ba%0,
  'circa':     circa%0,
  'prefix':    prefix%0,
  'year':      year%0,
  'simplerange': simplerange,
  'century':   century%0,
  'decade':    decade%0,
  'oldstyle':  oldstyle%0,
}
second_dict={
  'uncertain': uncertain%1,
  'ba':        ba%1,
  'circa':     circa%1,
  'prefix':    prefix%1,
  'year':      year%1,
  'century':   century%1,
  'decade':    decade%1,
  'oldstyle':  oldstyle%1,
}
first ='{uncertain}?{ba}?{circa}?{prefix}?({simplerange}|{oldstyle}|{decade}|{year}|{century})'.format(**first_dict)
second='{uncertain}?{ba}?{circa}?{prefix}?({oldstyle}|{decade}|{year}|{century})'.format(**second_dict)
second='((?P<rangesep>([x-])|([-]{2}))'+second+')?'
dd=prenote+first+second+postnote+'$'
re_dd=re.compile(dd,flags=re.IGNORECASE)
re_spaces=re.compile(r'(\s{2,})')
re_circa=re.compile(r'c.(.*?)(?=$|-|x)') # lookahead assertion

def simplerange_to_float(x):
  a,b=x.split('-')
  if len(b)==1: return float(a[:3]+b)
  return float(a[:2]+b)

class DD:
  ' date descriptor '
  def __init__(s,dd,verbose=False):
    s.verbose=verbose
    s.clean=''
    s.wordy=''
    s.sortkey=-1.0 # indicates not yet defined
    if not dd: # empty input
      s.ok=False; return
    s.dd=dd # save exact input
    s.regnalyear=None,None,None
    dds=dd.strip()
    # special case: regnal year (these cannot be uncertain)...
    ry=parse_regnal_year_spec(dds)
    if ry:
      if ry=='error':
        s.ok=False
        s.sortkey=-1.0
        s.wordy=s.clean='DDFIXME(%s)'%dd
        return
      if s.verbose: print('ry="%s"'%(ry,))
      s.ok=True
      s.regnalyear=ry
      year,monarch=ry[0],ry[1]+' '+str(ry[2])
      actual_year=get_regnalyear(year,monarch)
      if s.verbose: print('year="%s", monarch="%s", actual_year="%s"'%(year,monarch,actual_year,))
      if s.verbose: print('int_to_roman(ry[2])="%s"'%(int_to_roman(ry[2])))
      monarch_name=(monarch.split())[0]
      s.clean=str(year)+' '+monarch_name+' '+int_to_roman(ry[2])
      if s.verbose: print('s.clean="%s"'%(s.clean,))
      s.wordy=s.clean+' (%s)'%actual_year
      s.latex=s.clean+' (%s)'%actual_year
      s.latex=s.latex.replace('<','$<$').replace('>','$>$')
      if '/' in actual_year:
        s.oldstyle=actual_year
        x,y=actual_year.split('/')
        s.sortkey=float(x)+1.0
      else:
        s.sortkey=float(actual_year)
      return
    # general case...
    sortkey_offset0=sortkey_offset1=0.0
    m=re_dd.match(dds)
    if not m:
      s.ok=False
      return
    s.ok=True
    year0=''
    d=m.groupdict()
    items=list(d.items())
    year0=None
    prenote=postnote=c_rangesep=rangesep=''
    ba0=circa0=prefix0=year0=decade0=century0=oldstyle0=uncertain0=''
    ba1=circa1=prefix1=year1=decade1=century1=oldstyle1=uncertain1=''
    c_ba0=c_circa0=c_prefix0=c_year0=c_decade0=c_century0=c_oldstyle0=c_uncertain0=''
    c_ba1=c_circa1=c_prefix1=c_year1=c_decade1=c_century1=c_oldstyle1=c_uncertain1=''
    for key,value in items:
      if value is None: continue
      if s.verbose: print('key="%s" value="%s"'%(key,value,))
      if key=='simplerange':
        c_year0=year0=value
        s.sortkey=simplerange_to_float(value)
        break
      if key=='prenote':
        prenote=value.strip('[]')
        continue
      if key=='rangesep':
        c_rangesep=value
        rangesep=' to '
        continue
      if key=='postnote':
        postnote=value.strip('[]')
        continue
      if key=='ba0':
        c_ba0=value
        ba0=wordy_expander[value]
        continue
      if key=='ba1':
        c_ba1=value
        ba1=wordy_expander[value]
        continue
      if key=='circa0':
        c_circa0='c.'
        circa0='circa '
        continue
      if key=='circa1':
        c_circa1='c.'
        circa1='circa '
        continue
      if key=='uncertain0':
        c_uncertain0='?'
        uncertain0='perhaps '
        continue
      if key=='uncertain1':
        c_uncertain1='?'
        uncertain1='perhaps '
        continue
      if key=='prefix0':
        c_prefix0=value.lower()
        prefix0=wordy_expander[value.upper()]
        sortkey_offset0=eml_shift[value.upper()]
        continue
      if key=='prefix1':
        c_prefix1=value.lower()
        prefix1=' the '+wordy_expander[value.upper()]
        sortkey_offset1=eml_shift[value.upper()]
        continue
      if key=='decade0':
        c_decade0=decade0=value+'s'
        s.sortkey=float(value)
        continue
      if key=='decade1':
        c_decade1=decade1=value+'s'
        continue
      if key=='century0':
        s.sortkey=100.0*float(value)-100.0
        c_century0='%sC'%value
        century0=' %sth century'%value
        continue
      if key=='century1':
        s.sortkey=100.0*float(value)-100.0
        c_century1='%sC'%value
        century1=' %sth century'%value
        continue
      if key=='year0':
        c_year0=year0=value
        s.sortkey=float(year0)
        continue
      if key=='year1':
        c_year1=year1=value
        s.sortkey=float(year1)
        continue
      if key=='oldstyle0':
        c_oldstyle0=oldstyle0=value
        i=value.index('/')
        s.sortkey=float(value[:i])+1.0
        continue
      if key=='oldstyle1':
        c_oldstyle1=oldstyle1=value
        i=value.index('/')
        s.sortkey=float(value[:i])+1.0
        continue
    if sortkey_offset1:
      s.sortkey+=sortkey_offset1
    else:
      s.sortkey+=sortkey_offset0
    clean=' '.join((c_uncertain0,c_ba0,c_circa0,c_prefix0,c_year0,c_century0,c_decade0,c_oldstyle0,c_rangesep,c_uncertain1,c_ba1,c_circa1,c_prefix1,c_year1,c_century1,c_decade1,c_oldstyle1,))
    wordy=' '.join((prenote,uncertain0,ba0,circa0,prefix0,year0,century0,decade0,oldstyle0,rangesep,uncertain1,ba1,circa1,prefix1,year1,century1,decade1,oldstyle1,postnote,))
    wordy=re_spaces.sub(' ',wordy)
    s.wordy=wordy.replace(' ,',',').strip()
    s.clean=(prenote+' '+clean.replace(' ','')+' '+postnote).strip().replace(' ,',',')
    s.latex=s.clean
    if 'c.' in s.latex: #  \circa
      s.latex,k=re_circa.subn(r'\circa{\1}',s.latex,count=2)
    if '--' not in s.latex: s.latex=s.latex.replace('-','--')
    s.latex=s.latex.replace('<','$<$').replace('>','$>$').replace('x',r'$\times$')
  def get_ok(s):
    return s.ok
  def get_sortkey(s):
    return s.sortkey
  def get_clean(s):
    return s.clean
  def get_wordy(s):
    return s.wordy
  def get_latex(s):
    return s.latex
  def __repr__(s):
    return "DD('%s')"%(s.dd,)
  def __str__(s):
    return "DD('%s')"%(s.dd,)
  def __lt__(s,t):
    return s.sortkey<t.sortkey

def test_01(fn='DD_test_cases.txt'):
  print('%-20s\t%4s\t%18s\t%-22s\t%s'%('input','sort','normalized output','LaTeX output','verbose output',))
  print('-'*120)
  f=open(fn,'r')
  for line in f:
    if '#'==line[0]: continue
    dd=DD(line.strip('\n'))
    if not dd.get_ok():
      print('"%s" failed'%dd)
    else:
      print('%-20s\t%4.0f\t%18s\t%-22s\t%s'%(line[:-1],dd.get_sortkey(),dd.get_clean(),dd.get_latex(),dd.get_wordy(),))
      if dd.get_sortkey()<800: exit()
  f.close()

if __name__=='__main__':
  test_01()
