# -*- coding: iso-8859-1 -*-
# GNU Solfege - eartraining for GNOME
# Copyright (C) 2001, 2002, 2003  Tom Cato Amundsen
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

"""
prog             The test done before calling
 +statementlist
  +statement
   +assignment   peek: 'NAME', '='
    +faktorlist  scan('NAME') scan('=')
     +faktor
      +atom()  kalles direkt p frste linje. S evt p nytt etter +-/%
       +functioncall    peek: 'NAME' '('
        +faktorlist     peek() != ')'
   +block        peek: 'NAME', '{'
    +assignmentlist
    +faktor     peek_type()!= '}'
   +include      peek: 'NAME'("include"), '(
    +prog

assignmentlist peek: 'NAME' '='
+assignment

"""
# p singchord-1 sparer jeg ca 0.03 p  ha _peek_type
# P singchord-1 sparer jeg ikke noe p  ha en peek2_type(t1, t2)
# som tester de to neste token.

import sys
import os, os.path
import gettext
import re, string
import i18n

tokens = ('NAME', 'STRING', 'OPERATOR', 'INTEGER', 'CHAR', 'EOF')
for t in tokens:
    globals()[t] = t

NEW_re = re.compile("""(?:
                        (\s+)|  #space
                        (\#.*?$)| #comment
                        (-?\d+)| #integer
                        (\"\"\"(.*?)\"\"\")| #multiline string
                        ("(.*?)")| #string
                        (\w[\w-]*) #name
                )""",
                      re.VERBOSE|re.MULTILINE|re.DOTALL|re.UNICODE)

LI_INTEGER = NEW_re.match("-3").lastindex
LI_MSTRING = NEW_re.match('"""string"""').lastindex
LI_STRING = NEW_re.match('"string"').lastindex
LI_NAME = NEW_re.match("name").lastindex
LI_COMMENT = NEW_re.match("# comment").lastindex

lastindex_to_ID = {LI_INTEGER: INTEGER,
                     LI_STRING: STRING,
                     LI_MSTRING: STRING,
                     LI_NAME: NAME,
                    }

lastindex_to_group = {LI_INTEGER: 3,
                     LI_STRING: 7,
                     LI_MSTRING: 5,
                     LI_NAME: 8,
                    }


def get_translated_string(dict, name):
    for n in i18n.langs():
        if dict.has_key("%s(%s)" % (name, n)):
            return dict["%s(%s)" % (name, n)]
    return dict[name]


class DataparserException(Exception):
    def __init__(self, filename, lexer):
        Exception.__init__(self)
        self.m_filename = filename
        self.m_lexer = lexer
    def get_err_context(self, lineno):
        ret = ""
        if lineno > 0:
           ret += "\n(line %i): %s" % (lineno-1, self.m_lexer.get_line(lineno-1))
        ret += "\n(line %i): %s" % (lineno, self.m_lexer.get_line(lineno))
        s = self.m_lexer.get_line(lineno+1)
        if s:
            ret += "\n(line %i): %s" % (lineno+1, s)
        return ret + "\n"


class NameLookupException(DataparserException):
    def __init__(self, name, parser):
        DataparserException.__init__(self, parser.m_filename,
                                     parser._lexer)
        self.m_name = name
    def __str__(self):
        lt = self.m_lexer.m_tokens[self.m_lexer.pos]
        ret = "Unknown name '%s' in line %i of file '%s':" % (self.m_name, lt[3], self.m_filename)
        ret += self.get_err_context(lt[3])
        return ret


class SyntaxError(DataparserException):
    def __init__(self, parser, msg=""):
        DataparserException.__init__(self, parser.m_filename,
                                      parser._lexer)
        self.m_msg = msg
    def __str__(self):
        lt = self.m_lexer.m_tokens[self.m_lexer.pos]
        ret = "Syntax error in line %i of file '%s': %s" % (lt[3], self.m_filename, self.m_msg)
        ret += self.get_err_context(lt[3])
        return ret


class UnableToTokenizeException(DataparserException):
    def __init__(self, lexer, lineno, tok):
         DataparserException.__init__(self, None, lexer)
         self.m_lineno = lineno
         self.m_tok = tok
    def __str__(self):
        ret = "In line %i of input: does not recognise this string '%s' as a valid token.'" % (self.m_lineno, self.m_tok)
        if self.m_lexer.m_tokens:
            ret += self.get_err_context(self.m_lexer.m_tokens[-1][3])
        return ret


class Lexer:
    def __init__(self, src):
        r = re.compile("#.*?coding\s*[:=]\s*([\w_.-]+)")
        m = r.match(src)
        if m:
            src = unicode(src, m.groups()[0], errors="replace")
        else:
            src = unicode(src, "UTF-8", errors="replace")
        self.m_src = src
        pos = 0
        lineno = 0
        self.m_tokens = []
        while 1:
            try:
                if src[pos] in " \n\t{}=%+,/()":
                    if src[pos] in ' \t':
                        pos += 1
                        continue
                    if src[pos] == '\n':
                        pos += 1
                        lineno += 1
                        continue
                    self.m_tokens.append(('%s' % src[pos], src[pos], pos, lineno))
                    pos += 1
                    continue
            except IndexError:
                break
            m = NEW_re.match(src, pos)
            if not m:
                raise UnableToTokenizeException(self, lineno, src[pos])
            if m.lastindex == LI_COMMENT:
                pass
            else:
                self.m_tokens.append((lastindex_to_ID[m.lastindex],
                         m.group(lastindex_to_group[m.lastindex]), pos, lineno))
            pos = m.end()
        self.m_tokens.append(("EOF", None, None, None))
        self.m_tokens.append(("EOF", None, None, None))
        self.m_tokens.append(("EOF", None, None, None))
        self.m_tokens.append(("EOF", None, None, None))
        self.pos = 0
    def peek(self, forward=0):
        return self.m_tokens[self.pos+forward]
    def peek_type(self, forward=0):
        return self.m_tokens[self.pos+forward][0]
    def peek_string(self, forward=0):
        return self.m_tokens[self.pos+forward][1]
    def scan_any(self):
        """scan the next token"""
        self.pos += 1
        return self.m_tokens[self.pos-1][1]
    def scan(self, t=None):
        """t is the type of token we expect"""
        if self.m_tokens[self.pos][0] == t:
            self.pos += 1
            return self.m_tokens[self.pos-1][1]
        else:
            raise SyntaxError(self.m_parser, "Token '%s' not found, found '%s' of type %s " %(t, self.m_tokens[self.pos][1], self.m_tokens[self.pos][0]))
    def get_line(self, lineno):
        """line 0 is the first line
        Return an empty string if lineno is out of range.
        """
        idx = 0
        c = 0
        while c < lineno and idx < len(self.m_src):
            if self.m_src[idx] == '\n':
                c += 1
            idx += 1
        x = idx + 1
        while x < len(self.m_src) and self.m_src[x] != '\n':
            x += 1
        return self.m_src[idx:x]


class Dataparser:
    def __init__(self, globals={}, function_dict={}, gd=[]):
        self.gd = gd
        self.globals = globals
        self.functions = function_dict
        self.header = {}
        self.questions = []
        self.context = self.globals
    def parse_file(self, filename):
        """We always construct a new parser if we want to parse another
        file. So this method is never called twice for one parser.
        """
        self.m_filename = filename
        infile = file(filename, 'r')
        self._lexer = Lexer(infile.read())
        self._lexer.m_parser = self
        infile.close()
        self.reserved_words = ('_', 'question', 'header')
        self.prog()
    def prog(self):
        """prog: statementlist EOF"""
        self.statementlist()
        self._lexer.scan('EOF')
    def statementlist(self):
        """statementlist: (statement+)"""
        while self._lexer.peek_type() == 'NAME':
            self.statement()
    def statement(self):
        """statement: assignment | block | include"""
        if self._lexer.peek_type(1) == '=':
            self.assignment()
        elif self._lexer.peek_type(1) == '{':
            self.block()
        elif self._lexer.peek_type() == 'NAME' \
                and self._lexer.peek_string() == 'include' \
                and self._lexer.peek_type(1) == '(':
            self.include()
        else:
            raise SyntaxError(self, "Expected token '=' or '{', found  '%s' of type %s" % (self._lexer.peek_string(), self._lexer.peek_type() ))
    def include(self):
        self._lexer.scan_any() # scan include
        self._lexer.scan_any() # scan (
        try:
            filename = self._lexer.scan('STRING')
        except:
            print >> sys.stderr, "Warning: The file '%s' uses old style syntax for the include command." % self.m_filename
            print >> sys.stderr, 'This is not fatal now but will be in the future. You should change the code\nfrom include(filename) to include("filename")\n'
            filename = self._lexer.scan('NAME')
        old_lexer = self._lexer
        # don't let the new file pollute my header!
        old_header = self.header
        self.header = {}
        ifile = file(os.path.join(os.path.dirname(self.m_filename),
                                          filename), 'r')
        self._lexer = Lexer(ifile.read())
        ifile.close()
        self.prog()
        self._lexer = old_lexer
        self.header = old_header
        self._lexer.scan(')')
    def assignmentlist(self):
        """assignmentlist: (assignment+) """
        # FIXME peek(1) is added because of the music shortcut
        while self._lexer.peek_type() == 'NAME' and self._lexer.peek_type(1) == '=':
            self.assignment()
    def assignment(self):
        """NAME "=" faktor ("," faktor)* """
        name = self._lexer.scan_any()#('NAME')
        if name in self.reserved_words:
           raise SyntaxError(self, "Assignment to reserved word '%s'" % name)
        self._lexer.scan_any()#('=')
        faktorlist = self.faktorlist()
        if len(faktorlist) == 1:
            self.context[name] = faktorlist[0]
        else:
            self.context[name] = faktorlist
    def faktor(self):
        """faktor: atom
              ("+" atom
              |"-" atom
              |"/" atom
              )*
              """
        faktor = self.atom()
        peek = self._lexer.peek_type()
        while 1:
            if peek == '+':
                self._lexer.scan_any()
                faktor += self.atom()
            elif peek == '-':
                self._lexer.scan_any()
                faktor -= self.atom()
            elif peek == '/':
                self._lexer.scan_any()
                faktor = (faktor, self.atom())
            elif peek == '%':
                self._lexer.scan_any()
                faktor = faktor % self.atom()
            else:
                break
            peek = self._lexer.peek_type()
        return faktor
    def faktorlist(self):
        """faktorlist: faktor ("," faktor)* """
        faktorlist = [self.faktor()]
        while self._lexer.peek_type() == ',':
            self._lexer.scan_any()
            faktorlist.append(self.faktor())
        return faktorlist
    def atom(self):
        """atom: INTEGER | STRING | NAME | FUNCTIONCALL"""
        peek = self._lexer.peek_type()
        if peek == 'STRING':
            return self._lexer.scan('STRING')
        elif peek == 'INTEGER':
            return int(self._lexer.scan('INTEGER'))
        elif peek == 'NAME':
            if self._lexer.peek_type(1) == '(':
                return self.functioncall()
            return self.lookup_name(self._lexer.scan('NAME'))
        else:
            raise SyntaxError(self, "Expected STRING, INTEGER or NAME+'(', found '%s' of type %s" % (self._lexer.peek_type(), self._lexer.peek_type()))
    def functioncall(self):
        """functioncall: NAME "(" faktorlist ")" """
        name = self._lexer.scan_any()#'NAME')
        self._lexer.scan('(')
        if self._lexer.peek_type() == ')':
            # functioncall()
            self._lexer.scan(')')
            try:
                return self.functions(name)()
            except KeyError:
                raise NameLookupException(name, self)
        else:
            # functioncall(arglist)
            arglist = self.faktorlist()
            self._lexer.scan(')')
            try:
                return apply(self.functions[name], arglist)
            except KeyError:
                raise NameLookupException(name, self)
    def block(self):
        """block: NAME "{" assignmentlist "}" """
        name = self._lexer.scan_any()
        if name == 'header':
            self.context = self.header
        elif name == 'question':
            self.questions.append({})
            self.context = self.questions[-1]
        else:
            raise SyntaxError(self,  "Unknown block name '%s'" % name)
        self._lexer.scan_any() # scan '{'
        # The question block is a little more code because of the shortcut
        # we allow: question { "music string }
        if name == 'question':
           self.assignmentlist()
           if self._lexer.peek_type() != '}':
               self.context['music'] = self.faktor()
        # The single line two below is the code needed if we dont' have
        # shortcuts. Currently the headerblock goes here.
        else:
            self.assignmentlist()
        self._lexer.scan("}")
        if name == 'question': #FIXME this is code I want to remove.
            for n in self.gd:
                if not self.context.has_key(n):
                    self.context[n] = self.globals[n]
        self.context = self.globals
    def lookup_name(self, name):
        if self.context.has_key(name):
            return self.context[name]
        elif self.globals.has_key(name):
            return self.globals[name]
        else:
            raise NameLookupException(name, self)


def test_tokenizer():
    if len(sys.argv) == 1:
        print "Give the file to parse as command line argument."
        sys.exit(-1)
    infile = file(sys.argv[1], 'r')
    lexer = Lexer(infile)
    infile.close()
    i=0
    for t in lexer.m_tokens:
        print i, t
        i += 1


def main():
    args = sys.argv[1:]
    import getopt
    try:
        opts, args = getopt.getopt(args, 'bl', [])
    except:
        print "-b for benchmark"
        print "-l test the lexer"
        sys.exit()
    do_benchmark = 0
    test_what = 'scanner'
    for opt, val in opts:
        if opt == '-b':
            do_benchmark = 1
        if opt == '-l':
            test_what = 'lexer'
    if do_benchmark and (test_what == 'scanner'):
        import time
        t1 = time.clock()
        for x in xrange(300):
            p = Dataparser({'dictation': 'dictation',
          'progression': 'progression',
          'harmony': 'harmony',
          'sing-chord': 'sing-chord',
          'chord-voicing': 'chord-voicing',
          'chord': 'chord',
          'id-by-name': 'id-by-name',
          'satb': 'satb',
          'horiz': 'horiz',
          'vertic': 'vertic',
          'yes': 1,
          'no': 0,
          'accidentals': 'accidentals',
          'key': 'key',
          'semitones': 'semitones',
          'tempo': (60, 4)}, {'_': _})
            p.parse_file(sys.argv[-1])
            t2 = time.clock()
        print t2-t1
        #print p.questions
        #print p.header
        #print p.globals
    if test_what == 'lexer':
        import time
        t1 = time.clock()
        for x in xrange(300):
            f = file(sys.argv[-1], 'r')
            L = Lexer(f.read())
            f.close()
        print time.clock()-t1
        #print p.questions
        #print p.header
        #print p.globals
    else:
        print sys.argv
        for fn in sys.argv[1:]:
           if (not os.path.isfile(fn)) or (os.path.basename(fn) == "Makefile"):
               continue
           p = Dataparser({'dictation': 'dictation',
                  'progression': 'progression',
                  'harmony': 'harmony',
                  'sing-chord': 'sing-chord',
                  'chord-voicing': 'chord-voicing',
                  'chord': 'chord',
                  'id-by-name': 'id-by-name',
                  'satb': 'satb',
                  'horiz': 'horiz',
                  'vertic': 'vertic',
                  'yes': 1,
                  'no': 0,
                  'accidentals': 'accidentals',
                  'key': 'key',
                  'semitones': 'semitones',
                  'tempo': (60, 4)}, {'_': _})
           print fn
           p.parse_file(fn)

           #print "globals", p.globals
           print "header", p.header
           print "questions", p.questions

if __name__ == '__main__':
    main()
    sys.exit()
    import profile, pstats
    profile.run("main()", "profile.txt")
    s = pstats.Stats("profile.txt")
    s.strip_dirs().sort_stats('cumulative').print_stats(100)
    #print s.print_callers(.5, "peek_type")
