#!/usr/bin/python2.2
# 
# Copyright 2002, 2003 Zuza Software Foundation
# 
# This file is part of mozpotools.
#
# mozpotools is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# mozpotools is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with mozpotools; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

"""string processing utilities for extracting strings with various kinds of delimiters"""

def extract(source,startdelim,enddelim,escape,startinstring=0):
  """Extracts a doublequote-delimited string from a string, allowing for backslash-escaping"""
  # note that this returns the quote characters as well... even internally
  instring = startinstring
  inescape = 0
  # remember where the last start of the string was so we don't look for and end marker before that
  # or alternatively, where the last escape ended. so lastspecial >= laststart
  laststart = 0
  lastspecial = 0
  lenstart = len(startdelim)
  lenend = len(enddelim)
  str = ""
  if escape is None: escape = "&&&&&&&&&&&&&&&NOESCAPE&&&&&&&&&&&&&&&&&&" 
  for pos in range(len(source)):
    c = source[pos]
    if instring and inescape:
      # if in an escape, just add to the string
      str += c
      lastspecial = pos+1
    elif instring and ((pos-lenend < lastspecial) or (source.find(enddelim,pos-lenend) <> pos-lenend)):
      # if we're in the string and we're not at the end, add to the string
      str += c
    elif instring and (pos-lenend >= lastspecial) and (source.find(enddelim,pos-lenend) == pos-lenend) and (not inescape):
      # if we're in the string and we find we've just passed the end, mark that we're out
      instring = not instring
    elif (not instring) and (source.find(startdelim,pos) == pos) and (not inescape):
      # if we're not in the string and we find the start, add to the string and mark that we're in
      instring = not instring
      laststart = pos + lenstart
      lastspecial = laststart
      str += c
    if (source.find(escape,pos) == pos) and (not inescape):
      inescape = 1
    else:
      inescape = 0
  # if we're right at the end, just check if we've just had an end...
  pos = len(source)
  if instring and (pos-lenend >= laststart) and (source.find(enddelim,pos-lenend) == pos-lenend) and (not inescape):
    instring = not instring
  return (str,instring)

def extractfromlines(lines,startdelim,enddelim,escape):
  """Calls extract over multiple lines, remembering whether in the string or not"""
  result = ""
  instring = 0
  for line in lines:
    (string,instring) = extract(line,startdelim,enddelim,escape,instring)
    result += string
    if not instring: break
  return result

def extractstr(source):
  "Extracts a doublequote-delimited string from a string, allowing for backslash-escaping"
  (string,instring) = extract(source,'"','"','\\')
  return string

def extractcomment(lines):
  "Extracts <!-- > XML comments from lines"
  return extractfromlines(lines,"<!--","-->",None)

def extractwithoutquotes(source,startdelim,enddelim,escape,startinstring=0,includeescapes=1):
  """Extracts a doublequote-delimited string from a string, allowing for backslash-escaping"""
  # note that this doesn't returns the quote characters as well...
  instring = startinstring
  inescape = 0
  # remember where the last start of the string was so we don't look for and end marker before that
  # or alternatively, where the last escape ended. so lastspecial >= laststart
  laststart = 0
  lastspecial = 0
  lenstart = len(startdelim)
  lenend = len(enddelim)
  laststartinresultstr = None
  str = ""
  if escape is None: escape = "&&&&&&&&&&&&&&&NOESCAPE&&&&&&&&&&&&&&&&&&" 
  for pos in range(len(source)):
    c = source[pos]
    if instring and inescape:
      # if not including escapes in result, take them out
      if not includeescapes:
        str = str[:-len(escape)]
      # if in an escape, just add to the string
      str += c
      lastspecial = pos+1
    elif instring and ((pos-lenend < lastspecial) or (source.find(enddelim,pos-lenend) <> pos-lenend)):
      # if we're in the string and we're not at the end, add to the string
      str += c
    elif instring and (pos-lenend >= lastspecial) and (source.find(enddelim,pos-lenend) == pos-lenend) and (not inescape):
      # if we're in the string and we find we've just passed the end, mark that we're out
      instring = not instring
      # remove the last start bit in the result string and forget it
      str = str[:laststartinresultstr] + str[laststartinresultstr+lenstart:]
      laststartinresultstr = None
      # remove the end bit of the string
      str = str[:-lenend]
    elif (not instring) and (source.find(startdelim,pos) == pos) and (not inescape):
      # if we're not in the string and we find the start, add to the string and mark that we're in
      instring = not instring
      laststart = pos + lenstart
      lastspecial = laststart
      laststartinresultstr = len(str)
      str += c
    if (source.find(escape,pos) == pos) and (not inescape):
      inescape = 1
    else:
      inescape = 0

  # take out any remaining start in the resultstring
  if laststartinresultstr is not None:
    str = str[:laststartinresultstr] + str[laststartinresultstr+lenstart:]

  # if we're right at the end, just check if we've just had an end...
  pos = len(source)
  if instring and (pos-lenend >= laststart) and (source.find(enddelim,pos-lenend) == pos-lenend) and (not inescape):
    instring = not instring
    # and remember to remove it
    str = str[:-lenend]

  return (str,instring)

def escapequotes(source):
  "Returns the same string, with double quotes escaped with backslash"
  return source.replace('"','\\"')

def escapesinglequotes(source):
  "Returns the same string, with single quotes doubled"
  return source.replace("'","''")

def escapeunicode(str):
  return str.replace('\\u','\\\\u')

def unescapeunicode(str):
  return str.replace('\\\\u','\\u')

def quotestr(source):
  "Returns a doublequote-delimited quoted string, escaping double quotes with backslash"
  return '"' + escapequotes(source) + '"'

def singlequotestr(source):
  "Returns a doublequote-delimited quoted string, escaping single quotes with themselves"
  return "'" + escapesinglequotes(source) + "'"

def eitherquotestr(source):
  "Returns a singlequote- or doublequote-delimited string, depending on what quotes it contains"
  if '"' in source:
    return "'" + source + "'"
  else:
    return '"' + source + '"'

def findend(string,substring):
  s = string.find(substring)
  if s <> -1:
    s += len(substring)
  return s

def rstripeol(string):
  e = len(string)
  while (e > 0) and (string[e-1] in ['\n','\r']): e -= 1
  return string[:e]

def stripcomment(comment,startstring="<!--",endstring="-->"):
  cstart = comment.find(startstring)+len(startstring)
  cend = comment.find(endstring,cstart)
  return comment[cstart:cend].strip()

def unstripcomment(comment,startstring="<!-- ",endstring=" -->\n"):
  return startstring+comment.strip()+endstring

def testcase():
  x = ' "this" " is " "a" " test!" '
  print extract(x,'"','"',None)
  print extract(x,'"','"','!')
  print extractwithoutquotes(x,'"','"',None)
  print extractwithoutquotes(x,'"','"','!')
  print extractwithoutquotes(x,'"','"','!',includeescapes=0)

if __name__ == '__main__':
  testcase()

