unicode results from the CPAN

unicode

Alien-SVN

view release on metacpan or search on metacpan

src/subversion/build/generator/ezt.py view on Meta::CPAN

   The [is ...] directive is similar to the other conditional
   directives above.  But it allows to compare two value references or
   a value reference with some constant string.

   [define VARIABLE] ... [end]

   The [define ...] directive allows you to create and modify template
   variables from within the template itself.  Essentially, any data
   between inside the [define ...] and its matching [end] will be
   expanded using the other template parsing and output generation
   rules, and then stored as a string value assigned to the variable
   VARIABLE.  The new (or changed) variable is then available for use
   with other mechanisms such as [is ...] or [if-any ...], as long as
   they appear later in the template.

   [format "html|xml|js|url|raw"] ... [end]

   The [format ...] directive creates a block in which any substitutions
   are processed as though the template has been instantiated with the
   the corresponding 'base_format' argument. Comma-separated format
   specifiers perform nested encodings. In this case the encodings are
   applied left-to-right.  For example the directive: [format "html,js"]
   will HTML and then Javascript encode any inserted template variables.
"""
#
# Copyright (C) 2001-2009 Greg Stein. All Rights Reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
#   notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
#   notice, this list of conditions and the following disclaimer in the
#   documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
#
# This software is maintained by Greg and is available at:
#    http://code.google.com/p/ezt/
#

import os, re, sys

if sys.version_info[0] >= 3:
  # Python >=3.0
  long = int
  unicode = str
  from io import StringIO
  from urllib.parse import quote_plus as urllib_parse_quote_plus
else:
  # Python <3.0
  from urllib import quote_plus as urllib_parse_quote_plus
  try:
    from cStringIO import StringIO
  except ImportError:
    from StringIO import StringIO

#
# Formatting types
#
FORMAT_RAW = 'raw'
FORMAT_HTML = 'html'
FORMAT_XML = 'xml'
FORMAT_JS = 'js'
FORMAT_URL = 'url'

#
# This regular expression matches three alternatives:
#   expr: NEWLINE | DIRECTIVE | BRACKET | COMMENT
#   DIRECTIVE: '[' ITEM (whitespace ITEM)* ']
#   ITEM: STRING | NAME
#   STRING: '"' (not-slash-or-dquote | '\' anychar)* '"'
#   NAME: (alphanum | '_' | '-' | '.')+
#   BRACKET: '[[]'
#   COMMENT: '[#' not-rbracket* ']'
#
# When used with the split() method, the return value will be composed of
# non-matching text and the three paren groups (NEWLINE, DIRECTIVE and
# BRACKET). Since the COMMENT matches are not placed into a group, they are
# considered a "splitting" value and simply dropped.
#
_item = r'(?:"(?:[^\\"]|\\.)*"|[-\w.]+)'
_re_parse = re.compile(r'(\r?\n)|\[(%s(?: +%s)*)\]|(\[\[\])|\[#[^\]]*\]' %
                       (_item, _item))

_re_args = re.compile(r'"(?:[^\\"]|\\.)*"|[-\w.]+')

# block commands and their argument counts
_block_cmd_specs = { 'if-index':2, 'for':1, 'is':2, 'define':1, 'format':1 }
_block_cmds = _block_cmd_specs.keys()

# two regular expressions for compressing whitespace. the first is used to
# compress any whitespace including a newline into a single newline. the
# second regex is used to compress runs of whitespace into a single space.
_re_newline = re.compile('[ \t\r\f\v]*\n\\s*')
_re_whitespace = re.compile(r'\s\s+')

# this regex is used to substitute arguments into a value. we split the value,
# replace the relevant pieces, and then put it all back together. splitting
# will produce a list of: TEXT ( splitter TEXT )*. splitter will be '%' or
# an integer.
_re_subst = re.compile('%(%|[0-9]+)')

class Template:

  def __init__(self, fname=None, compress_whitespace=1,
               base_format=FORMAT_RAW):

src/subversion/build/generator/ezt.py view on Meta::CPAN

  if for_names:
    # From last to first part, check if this reference is part of a for loop
    for i in range(len(parts), 0, -1):
      name = '.'.join(parts[:i])
      if name in for_names:
        return refname, name, parts[i:]

  return refname, start, rest

def _get_value(refname_start_rest, ctx, filename, line_number):
  """refname_start_rest -> a prepared `value reference' (see above).
  ctx -> an execution context instance.

  Does a name space lookup within the template name space.  Active
  for blocks take precedence over data dictionary members with the
  same name.
  """
  (refname, start, rest) = refname_start_rest
  if rest is None:
    # it was a string constant
    return start

  # get the starting object
  if start in ctx.for_index:
    list, idx = ctx.for_index[start]
    ob = list[idx]
  elif start in ctx.defines:
    ob = ctx.defines[start]
  elif hasattr(ctx.data, start):
    ob = getattr(ctx.data, start)
  else:
    raise UnknownReference(refname, filename, line_number)

  # walk the rest of the dotted reference
  for attr in rest:
    try:
      ob = getattr(ob, attr)
    except AttributeError:
      raise UnknownReference(refname, filename, line_number)

  # make sure we return a string instead of some various Python types
  if isinstance(ob, (int, long, float)):
    return str(ob)
  if ob is None:
    return ''

  # string or a sequence
  return ob

def _replace(s, replace_map):
  for orig, repl in replace_map:
    s = s.replace(orig, repl)
  return s

REPLACE_JS_MAP = (
  ('\\', r'\\'), ('\t', r'\t'), ('\n', r'\n'), ('\r', r'\r'),
  ('"', r'\x22'), ('\'', r'\x27'), ('&', r'\x26'),
  ('<', r'\x3c'), ('>', r'\x3e'), ('=', r'\x3d'),
)

# Various unicode whitespace
if sys.version_info[0] >= 3:
  # Python >=3.0
  REPLACE_JS_UNICODE_MAP = (
    ('\u0085', r'\u0085'), ('\u2028', r'\u2028'), ('\u2029', r'\u2029')
  )
else:
  # Python <3.0
  REPLACE_JS_UNICODE_MAP = eval("((u'\u0085', r'\u0085'), (u'\u2028', r'\u2028'), (u'\u2029', r'\u2029'))")

# Why not cgi.escape? It doesn't do single quotes which are occasionally
# used to contain HTML attributes and event handler definitions (unfortunately)
REPLACE_HTML_MAP = (
  ('&', '&amp;'), ('<', '&lt;'), ('>', '&gt;'),
  ('"', '&quot;'), ('\'', '&#39;'),
)

def _js_escape(s):
  s = _replace(s, REPLACE_JS_MAP)
  ### perhaps attempt to coerce the string to unicode and then replace?
  if isinstance(s, unicode):
    s = _replace(s, REPLACE_JS_UNICODE_MAP)
  return s

def _html_escape(s):
  return _replace(s, REPLACE_HTML_MAP)

def _url_escape(s):
  ### quote_plus barfs on non-ASCII characters. According to
  ### http://www.w3.org/International/O-URL-code.html URIs should be
  ### UTF-8 encoded first.
  if isinstance(s, unicode):
    s = s.encode('utf8')
  return urllib_parse_quote_plus(s)

FORMATTERS = {
  FORMAT_RAW: None,
  FORMAT_HTML: _html_escape,
  FORMAT_XML: _html_escape,   ### use the same quoting as HTML for now
  FORMAT_JS: _js_escape,
  FORMAT_URL: _url_escape,
}

def _parse_format(format_string=FORMAT_RAW):
  format_funcs = []
  try:
    for fspec in format_string.split(','):
      format_func = FORMATTERS[fspec]
      if format_func is not None:
        format_funcs.append(format_func)
  except KeyError:
    raise UnknownFormatConstantError(format_string)
  return format_funcs

class _context:
  """A container for the execution context"""


class Reader:
  """Abstract class which allows EZT to detect Reader objects."""
  def filename(self):
    return '(%s does not provide filename() method)' % repr(self)

class _FileReader(Reader):
  """Reads templates from the filesystem."""
  def __init__(self, fname):
    self.text = open(fname, 'rb').read()
    if sys.version_info[0] >= 3:
      # Python >=3.0
      self.text = self.text.decode()
    self._dir = os.path.dirname(fname)
    self.fname = fname
  def read_other(self, relative):
    return _FileReader(os.path.join(self._dir, relative))
  def filename(self):
    return self.fname

class _TextReader(Reader):
  """'Reads' a template from provided text."""
  def __init__(self, text):
    self.text = text
  def read_other(self, relative):
    raise BaseUnavailableError()
  def filename(self):
    return '(text)'


class EZTException(Exception):
  """Parent class of all EZT exceptions."""
  def __init__(self, message=None, filename=None, line_number=None):
    self.message = message
    self.filename = filename

( run in 3.835 seconds using v1.01-cache-2.11-cpan-13bb782fe5a )