diff -Nru simpleparse-2.1.0a1/baseparser.py simpleparse-2.2.0/baseparser.py --- simpleparse-2.1.0a1/baseparser.py 2006-02-19 00:38:27.000000000 +0000 +++ simpleparse-2.2.0/baseparser.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,66 +0,0 @@ -"""Base class for real-world parsers (such as parser.Parser)""" -from simpleparse.stt.TextTools.TextTools import * -from simpleparse.generator import Generator - -class BaseParser: - """Class on which real-world parsers build - - Normally you use a sub-class of this class, such as - simpleparser.parser.Parser - """ - _rootProduction = "" - # primary API... - def parse( self, data, production=None, processor=None, start=0, stop=None): - """Parse data with production "production" of this parser - - data -- data to be parsed, a Python string, for now - production -- optional string specifying a non-default production to use - for parsing data - processor -- optional pointer to a Processor or MethodSource object for - use in determining reporting format and/or post-processing the results - of the parsing pass. Can be None if neither is desired (default) - start -- starting index for the parsing, default 0 - stop -- stoping index for the parsing, default len(data) - """ - self.resetBeforeParse() - if processor is None: - processor = self.buildProcessor() - if stop is None: - stop = len(data) - value = tag( data, self.buildTagger( production, processor), start, stop ) - if processor and callable(processor): - return processor( value, data ) - else: - return value - # abstract methods - def buildProcessor( self ): - """Build default processor object for this parser class - - The default implementation returns None. The processor - can either implement the "method source" API (just provides - information about Callouts and the like), or the processor - API and the method-source API. The processor API merely - requires that the object be callable, and have the signature: - - object( (success, children, nextPosition), buffer) - - (Note: your object can treat the first item as a single tuple - if it likes). - - See: simpleparse.processor module for details. - """ - return None - def buildTagger( self, name, processor ): - """Build the tag-table for the parser - - This method must be implemented by your base class and _not_ - call the implementation here. - """ - raise NotImplementedError( """Parser sub-class %s hasn't implemented a buildTagger method"""%(self.__class__.__name__)) - def resetBeforeParse( self ): - """Called just before the parser's parse method starts working, - - Allows you to set up special-purpose structures, such as stacks - or local storage values. There is no base implementation. The - base implementation does nothing. - """ diff -Nru simpleparse-2.1.0a1/common/calendar_names.py simpleparse-2.2.0/common/calendar_names.py --- simpleparse-2.1.0a1/common/calendar_names.py 2002-07-04 11:28:00.000000000 +0000 +++ simpleparse-2.2.0/common/calendar_names.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,102 +0,0 @@ -"""Locale-specific calendar names (day-of-week and month-of-year) - -These values are those returned by the calendar module. Available -productions: - - locale_day_names - locale_day_names_uc - locale_day_names_lc - Names for the days of the week - - locale_day_abbrs - locale_day_abbrs_uc - locale_day_abbrs_lc - Short-forms (3 characters normally) for - the days of the week. - - locale_month_names - locale_month_names_uc - locale_month_names_lc - Names for the months of the year - - locale_month_abbrs - locale_month_abbrs_uc - locale_month_abbrs_lc - Short-forms (3 characters normally) for - the months of the year - -Interpreters: - MonthNameInterpreter - DayNameInterpreter - Both offer the ability to set an index other - than the default (of 1) for the first item in - the list. -""" -import calendar, string -from simpleparse import objectgenerator, common - -c = {} - -da = calendar.day_abbr[:] -dn = calendar.day_name[:] -ma = calendar.month_abbr[:] -mn = calendar.month_name[:] - -def _build( name, set ): - # make sure longest equal-prefix items are first - set = set[:] - set.sort() - set.reverse() - l,u,r = [],[],[] - for item in set: - l.append( objectgenerator.Literal( value = string.lower(item) )) - u.append( objectgenerator.Literal( value = string.upper(item) )) - r.append( objectgenerator.Literal( value = item )) - c[ name + '_lc' ] = objectgenerator.FirstOfGroup( children = l ) - c[ name + '_uc' ] = objectgenerator.FirstOfGroup( children = u ) - c[ name ] = objectgenerator.FirstOfGroup( children = r ) - -_build( 'locale_day_names', dn ) -_build( 'locale_day_abbrs', da ) - - -_build( 'locale_month_names', mn ) -_build( 'locale_month_abbrs', ma ) - -da = map(string.lower, da ) -dn = map(string.lower, dn ) -ma = map(string.lower, ma ) -mn = map(string.lower, mn ) - - -common.share( c ) - -class NameInterpreter: - offset = 1 - def __init__( self, offset = 1 ): - self.offset = offset - def __call__( self, (tag, left, right, children), buffer ): - value = string.lower( buffer[left:right] ) - for table in self.tables: - try: - return table.index( value )+ self.offset - except ValueError: - pass - raise ValueError( """Unrecognised (but parsed) %s name %s at character %s"""%( self.nameType, value, left)) - -class MonthNameInterpreter( NameInterpreter): - """Interpret a month-of-year name as an integer index - - Pass an "offset" value to __init__ to use an offset other - than 1 (Monday = 1), normally 0 (Monday = 0) - """ - nameType = "Month" - tables = (mn,ma) -class DayNameInterpreter( NameInterpreter ): - """Interpret a day-of-week name as an integer index - - Pass an "offset" value to __init__ to use an offset other - than 1 (January = 1), normally 0 (January = 0) - """ - nameType = "Day" - tables = (dn,da) diff -Nru simpleparse-2.1.0a1/common/chartypes.py simpleparse-2.2.0/common/chartypes.py --- simpleparse-2.1.0a1/common/chartypes.py 2006-02-18 23:11:15.000000000 +0000 +++ simpleparse-2.2.0/common/chartypes.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,83 +0,0 @@ -"""Common locale-specific character types - -Following productions are all based on string module, -with the default locale specified. The first production -is a single character of the class and the second a -repeating character version: - - digit, digits - uppercasechar, uppercase - lowercasechar, lowercase - letter, letters - whitespacechar, whitespace - punctuationchar, punctuation - octdigit, octdigits - hexdigit, hexdigits - printablechar, printable - -For Python versions with the constants in the string module: - ascii_letter, ascii_letters - ascii_lowercasechar, ascii_lowercase - ascii_uppercasechar, ascii_uppercase - - -Following are locale-specific values, both are -single-character values: - - locale_decimal_point -- locale-specific decimal seperator - locale_thousands_seperator -- locale-specific "thousands" seperator - -Others: - - EOF -- Matches iff parsing has reached the end of the buffer - -There are no interpreters provided (the types are considered -too common to provide meaningful interpreters). -""" -from simpleparse import objectgenerator, common -import string, locale -locale.setlocale(locale.LC_ALL, "" ) - -c = {} - -# string-module items... - -for source,single,repeat in [ - ("digits","digit","digits"), - ("uppercase", "uppercasechar", "uppercase"), - ("lowercase", "lowercasechar", "lowercase"), - ("letters", "letter", "letters" ), - ("ascii_lowercase", "ascii_lowercasechar", "ascii_lowercase"), - ("ascii_uppercase", "ascii_uppercasechar", "ascii_uppercase"), - ("ascii_letters", "ascii_letter", "ascii_letters" ), - ("whitespace", "whitespacechar", "whitespace"), - ("punctuation", "punctuationchar", "punctuation"), - ("octdigits", "octdigit", "octdigits"), - ("hexdigits", "hexdigit", "hexdigits"), - ("printable", "printablechar", "printable"), -]: - try: - value = getattr( string, source ) - c[ single ] = objectgenerator.Range( value = value ) - c[ repeat ] = objectgenerator.Range( value = value, repeating =1 ) - except AttributeError: - pass - -# locale-module items -_lc = locale.localeconv() -c[ "locale_decimal_point" ] = objectgenerator.Literal( value = _lc["decimal_point"] ) -c[ "locale_thousands_seperator" ] = objectgenerator.Literal( value = _lc["thousands_sep"] ) - -del _lc - -# common, but not really well defined sets -# this is the set of characters which are interpreted -# specially by Python's string-escaping when they -# follow a \\ char. - -from simpleparse.stt import TextTools -c[ "EOF" ] = objectgenerator.Prebuilt( value = ( - (None, TextTools.EOF, TextTools.Here), -) ) - -common.share( c ) diff -Nru simpleparse-2.1.0a1/common/comments.py simpleparse-2.2.0/common/comments.py --- simpleparse-2.1.0a1/common/comments.py 2006-02-19 00:29:53.000000000 +0000 +++ simpleparse-2.2.0/common/comments.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,73 +0,0 @@ -"""Common comment formats - -To process, handle the "comment" production, -(the specific named comment formats are all -expanded productions, so you won't get them -returned for processing). - - hash_comment - # to EOL comments - slashslash_comment - // to EOL comments - semicolon_comment - ; to EOL comments - slashbang_comment - c_comment - non-nesting /* */ comments - slashbang_nest_comment - c_nest_comment - nesting /* /* */ */ comments -""" -from simpleparse.parser import Parser -from simpleparse import common, objectgenerator -from simpleparse.common import chartypes - -c = {} - -eolcomments = r""" -### comment formats where the comment goes -### from a marker to the end of the line - -comment := -'\012'* - := ('\r'?,'\n')/EOF - ->hash_comment< := '#', comment, EOL ->semicolon_comment< := ';', comment, EOL ->slashslash_comment< := '//', comment, EOL -""" - -_p = Parser( eolcomments ) -for name in ["hash_comment", "semicolon_comment", "slashslash_comment"]: - c[ name ] = objectgenerator.LibraryElement( - generator = _p._generator, - production = name, - ) - -ccomments = r""" -### comments in format /* comment */ with no recursion allowed -comment := -"*/"* ->slashbang_comment< := '/*', comment, '*/' -""" -_p = Parser( ccomments ) -for name in ["c_comment","slashbang_comment"]: - c[ name ] = objectgenerator.LibraryElement( - generator = _p._generator, - production = "slashbang_comment", - ) - -nccomments = r""" -### nestable C comments of form /* comment /* innercomment */ back to previous */ - := '/*' - := '*/' -comment := (-(comment_stop/comment_start)+/slashbang_nest_comment)* ->slashbang_nest_comment< := comment_start, comment, comment_stop -""" -_p = Parser( nccomments ) -for name in ["c_nest_comment","slashbang_nest_comment"]: - c[ name ] = objectgenerator.LibraryElement( - generator = _p._generator, - production = "slashbang_nest_comment", - ) - -common.share(c) - diff -Nru simpleparse-2.1.0a1/common/__init__.py simpleparse-2.2.0/common/__init__.py --- simpleparse-2.1.0a1/common/__init__.py 2002-07-02 09:23:44.000000000 +0000 +++ simpleparse-2.2.0/common/__init__.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,19 +0,0 @@ -"""Common (library) definitions - -You normally use this module by importing one of our -sub-modules (which automatically registers itself with -the SOURCES list defined here). - -Calling common.share( dictionary ) with a dictionary -mapping string names to element token instances will -make the element tokens available under those string -names in default parsers. Note: a Parser can override -this by specifying an explicit definitionSources -parameter in its initialiser. -""" - -def share( dictionary ): - SOURCES.append( dictionary) - -SOURCES = [ -] \ No newline at end of file diff -Nru simpleparse-2.1.0a1/common/iso_date_loose.py simpleparse-2.2.0/common/iso_date_loose.py --- simpleparse-2.1.0a1/common/iso_date_loose.py 2002-07-10 05:14:48.000000000 +0000 +++ simpleparse-2.2.0/common/iso_date_loose.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,136 +0,0 @@ -"""Somewhat Looser ISO date format YYYY-MM-DD HH:mm:SS +HH:mm - - ISO_date_loose -- YYYY-MM-DD format, with a month and day optional, - month or day may be specified without leading 0 - ISO_time_loose -- HH:mm:SS format, with minutes and seconds optional - all numbers may be specified without leading 0 - ISO_date_time_loose -- YYYY-MM-DD HH:mm:SS +HH:mm format, - with time optional and TimeZone offset optional, - same format for date and time as above - -Interpreter: - MxInterpreter - Interprets the parse tree as mx.DateTime values - Date and DateTime -> DateTime objects - Time only -> RelativeDateTime -""" -try: - from mx import DateTime - haveMX = 1 -except ImportError: - haveMX = 0 -from simpleparse.parser import Parser -from simpleparse import common, objectgenerator -from simpleparse.common import chartypes, numbers -from simpleparse.dispatchprocessor import * - -c = {} -declaration = """ - := [-] - := ':' -offset_sign := [-+] - -year := int -month := int -day := int -hour := int -minute := int -second := float/int -ISO_date_loose := year, (date_separator, month, (date_separator, day)?)? -ISO_time_loose := hour, (time_separator, minute, (time_separator, second)?)? -offset := offset_sign, offset_hour, time_separator?, offset_minute? -offset_hour := digit, digit -offset_minute := digit, digit - -ISO_date_time_loose := ISO_date_loose, ([T ], ISO_time_loose)?, [ ]?, offset? -""" - -_p = Parser( declaration ) -for name in ["ISO_time_loose","ISO_date_time_loose", "ISO_date_loose"]: - c[ name ] = objectgenerator.LibraryElement( - generator = _p._generator, - production = name, - ) -common.share( c ) - -if haveMX: - class MxInterpreter(DispatchProcessor): - """Interpret a parsed ISO_date_time_loose in GMT/UTC time or localtime - """ - int = numbers.IntInterpreter() - offset_minute = offset_hour = year = month = day = hour = minute = int - - float = numbers.FloatInterpreter() - second = float - - def __init__( - self, - inputLocal = 1, - returnLocal = 1, - ): - self.inputLocal = inputLocal - self.returnLocal = returnLocal - dateName = 'ISO_date_loose' - timeName = 'ISO_time_loose' - def ISO_date_time_loose( self, (tag, left, right, sublist), buffer): - """Interpret the loose ISO date + time format""" - set = singleMap( sublist, self, buffer ) - base, time, offset = ( - set.get(self.dateName), - set.get(self.timeName) or DateTime.RelativeDateTime(hour=0,minute=0,second=0), - set.get( "offset" ), - ) - base = base + time - offset = set.get( "offset" ) - if offset is not None: - # an explicit timezone was entered, convert to gmt and return as appropriate... - gmt = base - offset - if self.returnLocal: - return gmt.localtime() - else: - return gmt - # was in the default input locale (either gmt or local) - if self.inputLocal and self.returnLocal: - return base - elif not self.inputLocal and not self.returnLocal: - return base - elif self.inputLocal and not self.returnLocal: - # return gmt from local... - return base.gmtime() - else: - return base.localtime() - def ISO_date_loose( self, (tag, left, right, sublist), buffer): - """Interpret the loose ISO date format""" - set = singleMap( sublist, self, buffer ) - return DateTime.DateTime( - set.get("year") or now().year, - set.get("month") or 1, - set.get("day") or 1, - ) - def ISO_time_loose( self, (tag, left, right, sublist), buffer): - """Interpret the loose ISO time format""" - set = singleMap( sublist, self, buffer ) - return DateTime.RelativeDateTime( - hour = set.get("hour") or 0, - minute = set.get("minute") or 0, - second = set.get("second") or 0, - ) - - - def offset( self, (tag, left, right, sublist), buffer): - """Calculate the time zone offset as a date-time delta""" - set = singleMap( sublist, self, buffer ) - direction = set.get('offset_sign',1) - hour = set.get( "offset_hour", 0) - minute = set.get( "offset_minute", 0) - delta = DateTime.DateTimeDelta( 0, hour*direction, minute*direction) - return delta - - def offset_sign( self , (tag, left, right, sublist), buffer): - """Interpret the offset sign as a multiplier""" - v = buffer [left: right] - if v in ' +': - return 1 - else: - return -1 - diff -Nru simpleparse-2.1.0a1/common/iso_date.py simpleparse-2.2.0/common/iso_date.py --- simpleparse-2.1.0a1/common/iso_date.py 2002-08-06 03:31:39.000000000 +0000 +++ simpleparse-2.2.0/common/iso_date.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,148 +0,0 @@ -"""Canonical ISO date format YYYY-MM-DDTHH:mm:SS+HH:mm - -This parser is _extremely_ strict, and the dates that match it, -though really easy to work with for the computer, are not particularly -readable. See the iso_date_loose module for a slightly relaxed -definition which allows the "T" character to be replaced by a -" " character, and allows a space before the timezone offset, as well -as allowing the integer values to use non-0-padded integers. - - - ISO_date -- YYYY-MM-DD format, with a month and date optional - ISO_time -- HH:mm:SS format, with minutes and seconds optional - ISO_date_time -- YYYY-MM-DD HH:mm:SS+HH:mm format, - with time optional and TimeZone offset optional - -Interpreter: - MxInterpreter - Interprets the parse tree as mx.DateTime values - ISO_date and ISO_time - returns DateTime objects - Time only - returns RelativeDateTime object which, when - added to a DateTime gives you the given time - within that day -""" -try: - from mx import DateTime - haveMX = 1 -except ImportError: - haveMX = 0 -from simpleparse.parser import Parser -from simpleparse import common, objectgenerator -from simpleparse.common import chartypes, numbers -from simpleparse.dispatchprocessor import * - -c = {} - -declaration =""" -year := digit,digit,digit,digit -month := digit,digit -day := digit,digit - -hour := digit,digit -minute := digit,digit -second := digit,digit -offset_sign := [-+] -offset := offset_sign, hour, time_separator?, minute - - := '-' - := ':' - -ISO_date := year, (date_separator, month, (date_separator, day)?)? -ISO_time := hour, (time_separator, minute, (time_separator, second)?)? -ISO_date_time := ISO_date, ([T], ISO_time)?, offset? -""" - - - - -_p = Parser( declaration ) -for name in ["ISO_time","ISO_date", "ISO_date_time"]: - c[ name ] = objectgenerator.LibraryElement( - generator = _p._generator, - production = name, - ) -common.share( c ) - -if haveMX: - class MxInterpreter(DispatchProcessor): - """Interpret a parsed ISO_date_time_loose in GMT/UTC time or localtime - """ - def __init__( - self, - inputLocal = 1, - returnLocal = 1, - ): - self.inputLocal = inputLocal - self.returnLocal = returnLocal - dateName = 'ISO_date' - timeName = 'ISO_time' - def ISO_date_time( self, (tag, left, right, sublist), buffer): - """Interpret the loose ISO date + time format""" - set = singleMap( sublist, self, buffer ) - base, time, offset = ( - set.get(self.dateName), - set.get(self.timeName) or DateTime.RelativeDateTime(hour=0,minute=0,second=0), - set.get( "offset" ), - ) - base = base + time - offset = set.get( "offset" ) - if offset is not None: - # an explicit timezone was entered, convert to gmt and return as appropriate... - gmt = base - offset - if self.returnLocal: - return gmt.localtime() - else: - return gmt - # was in the default input locale (either gmt or local) - if self.inputLocal and self.returnLocal: - return base - elif not self.inputLocal and not self.returnLocal: - return base - elif self.inputLocal and not self.returnLocal: - # return gmt from local... - return base.gmtime() - else: - return base.localtime() - def ISO_date( self, (tag, left, right, sublist), buffer): - """Interpret the ISO date format""" - set = {} - for item in sublist: - set[ item[0] ] = dispatch( self, item, buffer) - return DateTime.DateTime( - set.get("year") or now().year, - set.get("month") or 1, - set.get("day") or 1, - ) - def ISO_time( self, (tag, left, right, sublist), buffer): - """Interpret the ISO time format""" - set = {} - for item in sublist: - set[ item[0] ] = dispatch( self, item, buffer) - return DateTime.RelativeDateTime( - hour = set.get("hour") or 0, - minute = set.get("minute") or 0, - second = set.get("second") or 0, - ) - - integer = numbers.IntInterpreter() - second = offset_minute = offset_hour = year = month = day = hour =minute =integer - - def offset( self, (tag, left, right, sublist), buffer): - """Calculate the time zone offset as a date-time delta""" - set = singleMap( sublist, self, buffer ) - direction = set.get('offset_sign',1) - hour = set.get( "hour", 0) - minute = set.get( "minute", 0) - delta = DateTime.DateTimeDelta( 0, hour*direction, minute*direction) - return delta - - def offset_sign( self , (tag, left, right, sublist), buffer): - """Interpret the offset sign as a multiplier""" - v = buffer [left: right] - if v in ' +': - return 1 - else: - return -1 - diff -Nru simpleparse-2.1.0a1/common/numbers.py simpleparse-2.2.0/common/numbers.py --- simpleparse-2.1.0a1/common/numbers.py 2002-08-08 20:54:29.000000000 +0000 +++ simpleparse-2.2.0/common/numbers.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,187 +0,0 @@ -"""Samples showing the parsing of common programming-language constructs - -numbers - integers - int - int_unsigned - - hexidecimal integers - hex - - floats (including exponents, requring a '.' in the literal) - float - floats, with optional integer-only exponents - float_floatexp - floats, with optional integer or float exponents - - imaginary_number - (float/int),[jJ] - - number - hex/float/int - number_full - binary_number/imaginary_number/hex/float/int - - binary_number - signed binary number - 1001001b or 1001001B bit-field format, - optional sign - can be used with number as (binary_number/number) - -Interpreters: - - IntInterpreter - int, int_unsigned - HexInterpreter - hex - FloatInterpreter - float - FloatFloatExpInterpreter - float_floatexp - BinaryInterpreter - binary_number - ImaginaryInterpreter - imaginary_number - -""" -from simpleparse.parser import Parser -from simpleparse import common, objectgenerator -from simpleparse.common import chartypes -from simpleparse.dispatchprocessor import * -import string - -c = {} - -declaration = r""" -# sample for parsing integer and float numbers -# including hexidecimal numbers in 0xFFF format -sign := [-+]+ - - := digits - := hexdigits - -decimal_fraction := '.',int_unsigned? - -# float which is explicitly a float, cannot be an integer -# because it includes a decimal point -explicit_base := sign?, ((int_unsigned, decimal_fraction) / decimal_fraction / (int_unsigned,'.')) - -exponent := int -exponent_loose := explicit_base/int - -float := explicit_base, ([eE],exponent)? -float_floatexp := explicit_base, ([eE],exponent_loose)? - -hex := sign?, '0', [xX], hexdigits -int_unsigned := l_digits -int := sign?, l_digits -binary_digits := [01]+ -binary_number := sign?, binary_digits,('b'/'B') - -imaginary_number := (float/int), [jJ] - -##number := binary_number/hex/float/int -number := hex/float/int -number_full := binary_number/imaginary_number/hex/float/int -""" - -_p = Parser( declaration ) -for name in ["int","hex", "int_unsigned", "number", "float", "binary_number", "float_floatexp", "imaginary_number", "number_full"]: - c[ name ] = objectgenerator.LibraryElement( - generator = _p._generator, - production = name, - ) - -if __name__ == "__main__": - test() - -common.share( c ) - -def _toInt( s, base ): - try: - return int( s, base) - except TypeError: - return string.atoi( s, base) -def _toLong( s, base ): - try: - return long( s, base) - except TypeError: - return string.atol( s, base) - -class IntInterpreter(DispatchProcessor): - """Interpret an integer (or unsigned integer) string as an integer""" - def __call__( self, (tag, left, right, children), buffer): - try: - return _toInt( buffer[left:right], 10) - except ValueError: - return _toLong( buffer[left:right], 10) -class HexInterpreter(DispatchProcessor): - """Interpret a hexidecimal integer string as an integer value""" - def __call__( self, (tag, left, right, children), buffer): - try: - return _toInt( buffer[left:right], 16) - except ValueError: - return _toLong( buffer[left:right], 16) - -class FloatFloatExpInterpreter(DispatchProcessor): - """Interpret a float string as an integer value - Note: we're allowing float exponentiation, which - gives you a nice way to write 2e.5 - """ - def __call__( self, (tag, left, right, children), buffer): - tag, l, r, _ = children[0] - base = float( buffer[l:r] ) - if len(children) > 1: - # figure out the exponent... - exp = children[1] - exp = buffer[ exp[1]:exp[2]] -## import pdb -## pdb.set_trace() - exp = float( exp ) - - base = base * (10** exp) - return base -class FloatInterpreter(DispatchProcessor): - """Interpret a standard float value as a float""" - def __call__( self, (tag, left, right, children), buffer): - return float( buffer[left:right]) - -import sys -if hasattr( sys,'version_info') and sys.version_info[:2] > (2,0): - class BinaryInterpreter(DispatchProcessor): - def __call__( self, (tag, left, right, children), buffer): - """Interpret a bitfield set as an integer""" - return _toInt( buffer[left:right-1], 2) -else: - class BinaryInterpreter(DispatchProcessor): - def __call__( self, (tag, left, right, children), buffer): - """Interpret a bitfield set as an integer, not sure this algo - is correct, will see I suppose""" - sign = 1 - if len(children) > 2: - s = children[0] - for schar in buffer[s[1]:s[2]]: - if schar == '-': - sign = sign * -1 - bits = buffer[children[1][1]:children[1][2]] - else: - bits = buffer[children[0][1]:children[0][2]] - value = 0 - for bit in bits: - value = (value << 1) - if bit == '1': - value = value + 1 - return value - -class ImaginaryInterpreter( DispatchProcessor ): - map = { - "float":FloatInterpreter(), - "int":IntInterpreter() - } - def __call__( self, (tag, left, right, children), buffer): - """Interpret a bitfield set as an integer, not sure this algo - is correct, will see I suppose""" - base = children[0] - base = self.mapSet[base[0]](base, buffer) - return base * 1j - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/common/phonetics.py simpleparse-2.2.0/common/phonetics.py --- simpleparse-2.1.0a1/common/phonetics.py 2002-08-06 03:31:39.000000000 +0000 +++ simpleparse-2.2.0/common/phonetics.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,78 +0,0 @@ -"""Phonetic spellings for character values - -At the moment, only contains the "military alphabet" -(Alpha, Bravo ... Yankee, Zulu), which is used as -alternative timezone names by the military and apparently -some aviation groups. Note, these are fairly common spellings, -but they aren't necessarily going to match a particular -usage. I may have missed some of the possibilities... - - military_alphabet_char -- fully spelled out versions of - the Alpha, Bravo ... Yankee, Zulu phonetic alphabet, - including a few minor variations in spelling such as - Xray and X-ray. All characters use title-caps format, - so Zulu, not zulu will match. - military_alphabet_char_lower -- as for above, but with - lowercased versions of the above - -No interpreters are provided. Taking the first character of -the name will always give you the equivalent character uppercase -for the military_alphabet_char and lowercase for the -military_alphabet_char_lower. -""" -from simpleparse import objectgenerator, common -import string - -c = {} - -# note that Juliette comes before Juliet, because -# otherwise Juliette could never match in an FOGroup! -_letters = string.split( """Alpha -Bravo -Charlie -Delta -Echo Echo -Foxtrot -Golf Gulf -Hotel -India -Juliette Juliet -Kilo -Lima -Mike -November -Oscar -Papa -Quebec -Romeo -Sierra -Tango -Uniform -Victor -Whiskey -Xray X-ray -Yankee -Zulu""") - -set1,set2 = [], [] -for item in _letters: - set1.append( - objectgenerator.Literal( value=item) - ) - set2.append( - objectgenerator.Literal( value=string.lower(item)) - ) - -military_alphabet_char = objectgenerator.FirstOfGroup( - children = set1 -) -military_alphabet_char_lower = objectgenerator.FirstOfGroup( - children = set2 -) -del set1, set2 - -c[ "military_alphabet_char" ] = military_alphabet_char -c[ "military_alphabet_char_lower" ] = military_alphabet_char_lower - -common.share( c ) - diff -Nru simpleparse-2.1.0a1/common/strings.py simpleparse-2.2.0/common/strings.py --- simpleparse-2.1.0a1/common/strings.py 2002-08-07 00:11:20.000000000 +0000 +++ simpleparse-2.2.0/common/strings.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,161 +0,0 @@ -"""Python string parsers with escape characters - -Python-string-like operation as much as possible, this includes: - support for single and double-quoted strings - support for triple-quoted versions of the same - support for special character escapes as seen in 8-bit python strings - support for octal and hexidecimal character escapes - - - string_single_quote - string_double_quote - string_triple_single - string_triple_double - Individual string types with the above features - - string - Any of the above string types, in a simple FirstOf group - with the triple-quoted types first, then the single quoted - i.e. generated with this grammar: - - string_triple_double/string_triple_single/string_double_quote/string_single_quote - - -Interpreters: - StringInterpreter - Interprets any/all of the above as a normal (non-Raw) Python - regular (non-unicode) string. Hopefully the action is identical - to doing eval( matchedString, {},{}), without the negative security - implications of that approach. Note that you need to make the - interpreter available under each name you use directly in your - grammar, so if you use string_single_quote and string_double_quote - directly, then you need to add: - string_single_quote = myStringInterpreterInstance - string_double_quote = myStringInterpreterInstance - to your processor class. -""" - -from simpleparse.parser import Parser -from simpleparse import common, objectgenerator -from simpleparse.common import chartypes -from simpleparse.dispatchprocessor import * -import string - -c = {} - -stringDeclaration = r""" -# note that non-delimiter can never be hit by non-triple strings -str := delimiter, (char_no_quote/escaped_char/backslash_char/nondelimiter)*,delimiter - -escaped_char := '\\',( string_special_escapes / ('x',hex_escaped_char) / octal_escaped_char ) -octal_escaped_char := octdigit, octdigit?, octdigit? -hex_escaped_char := hexdigit,hexdigit - -backslash_char := "\\" # i.e. a backslash preceding a non-special char - -""" - -_stringTypeData = [ - ("string_double_quote", """ - := '"' -nondelimiter := -'"' -char_no_quote := -[\\\\"]+ -string_special_escapes := [\\\\abfnrtv"] -"""), - ("string_single_quote", """ - := "'" -nondelimiter := -"'" -char_no_quote := -[\\\\']+ -string_special_escapes := [\\\\abfnrtv'] -"""), - ("string_triple_single", """ -nondelimiter := -"'''" - := "'''" -char_no_quote := -[\\\\']+ -string_special_escapes := [\\\\abfnrtv'] -"""), - ("string_triple_double",''' -nondelimiter := -'"""' - := '"""' -char_no_quote := -[\\\\"]+ -string_special_escapes := [\\\\abfnrtv"] -'''), -] - -for name, partial in _stringTypeData: - _p = Parser( stringDeclaration + partial ) - c[ name ] = objectgenerator.LibraryElement( - generator = _p._generator, - production = "str", - ) -common.share( c ) -_p = Parser( """ -string := string_triple_double/string_triple_single/string_double_quote/string_single_quote -""" ) -c[ "string"] = objectgenerator.LibraryElement( - generator = _p._generator, - production = "string", -) - -class StringInterpreter(DispatchProcessor): - """Processor for converting parsed string values to their "intended" value - - Basically this processor handles de-escaping and stripping the - surrounding quotes, so that you get the string as a Python string - value. You use the processor by creating an instance of - StringInterpreter() as an item in another processor's - methodSource object (often the Parser itself). - - For example: - - class MyProcessor( DispatchProcessor ): - string = StringInterpreter() - - # following would be used if you have, for instance, - # used string_single_quote in an area where double - # or triple-quoted strings are not allowed, but have - # used string in another area. - string_single_quote = string - """ - def string( self, (tag, left, right, sublist), buffer): - """Dispatch any of the string types and return the result""" - return dispatch( self, sublist[0], buffer ) - - def string_single_quote( self, (tag, left, right, sublist), buffer): - return string.join(dispatchList(self, sublist, buffer), "") - string_double_quote = string_single_quote - string_triple_single = string_single_quote - string_triple_double = string_single_quote - - def char_no_quote( self, (tag, left, right, sublist), buffer): - return buffer[left:right] - nondelimiter = char_no_quote - - def escaped_char( self, (tag, left, right, sublist), buffer): - return string.join(dispatchList(self,sublist,buffer), "") - - def octal_escaped_char(self, (tag, left, right, sublist), buffer): - return chr(string.atoi( buffer[left:right], 8 )) - def hex_escaped_char( self, (tag, left, right, sublist), buffer): - return chr(string.atoi( buffer[left:right], 16 )) - - def backslash_char( self, (tag, left, right, sublist), buffer): - return "\\" - - def string_special_escapes( self, (tag, left, right, sublist), buffer): - """Maps "special" escapes to the corresponding characters""" - return self.specialescapedmap[ buffer[left:right]] - specialescapedmap = { - 'a':'\a', - 'b':'\b', - 'f':'\f', - 'n':'\n', - 'r':'\r', - 't':'\t', - 'v':'\v', - '\\':'\\', - '\n':'', - '"':'"', - "'":"'", - } - diff -Nru simpleparse-2.1.0a1/common/timezone_names.py simpleparse-2.2.0/common/timezone_names.py --- simpleparse-2.1.0a1/common/timezone_names.py 2002-08-06 03:31:39.000000000 +0000 +++ simpleparse-2.2.0/common/timezone_names.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,218 +0,0 @@ -"""Common timezone names (civilian, military and combined) - -These productions are a collection of common civilian and -military timezone names. The list of names is by no means -exhaustive (nor definitive), but it gives most timezones -at least one named value (to make it possible to enter the -name), and it doesn't repeat any names (I hope ;) ). You -have three major classes of names, civilian (EST, PST, GMT, -UTC), military single-character (A,B,C,D,E...) and military -phonetic spelling (Alpha, Bravo... Zulu). The military -variants are combined into a single production, however. - - civilian_timezone_name -- the "familiar" timezones, most - real-world data entry will want to use this as their - "timezone" definition I'm guessing. - - military_timezone_name -- military timezones in the two - formats outlined above. - - timezone_name -- combination of the two above into a - single production. - -Interpreter: - - TimeZoneNameInterpreter -- see below for details, by - default takes the timezone name and converts to - a second offset in West-negative format. Note: - this is the _opposite_ of the time module, but is - the more commonly used format AFAIK. Null matches - will return a default TimeZone as specified. -""" -from simpleparse import objectgenerator, common -from simpleparse.common import phonetics -import time - -c = {} - -timezone_data = [] -civilian_data = [ - # Basically this defines our recognised input locales, - # it is by no means exhaustive, but it gives fairly - # good coverage with minimal overlap - ('NZDT',46800), - ('IDLE',43200), - ('NZST',43200), - ('NZT',43200), - ('AESST',39600), - ('ACSST',37800), - ('CADT',37800), - ('SADT',37800), - ('AEST',36000), - ('EAST',36000), - ('GST',36000), - ('LIGT',36000), - ('ACST',34200), - ('CAST',34200), - ('SAT',34200), - ('AWSST',32400), - ('JST',32400), - ('KST',32400), - ('WDT',32400), - ('MT',30600), - ('AWST',28800), - ('CCT',28800), - ('WADT',28800), - ('WST',28800), - ('JT',27000), - ('WAST',25200), - ('IT',12600), - ('BT',10800), - ('EETDST',10800), - ('MSK', 10800), - ('CETDST',7200), - ('EET',7200), - ('FWT',7200), - ('IST',7200), - ('MEST',7200), - ('METDST',7200), - ('SST',7200), - ('BST',3600), - ('CET',3600), - ('DNT',3600), - ('DST',3600), - ('FST',3600), - ('MET',3600), - ('MEWT',3600), - ('MEZ',3600), - ('NOR',3600), - ('SET',3600), - ('SWT',3600), - ('WETDST',3600), - ('GMT',0), - ('UTC', 0), - ('WET',0), - ('WAT',-3600), - ('NDT',-5400), - ('AT', -7200), - ('ADT',-10800), - ('NFT',-9000), - ('NST',-9000), - ('AST',-14400), - ('EDT',-14400), - ('ZP4',-14400), - ('CDT',-18000), - ('EST',-18000), - ('ZP5',-18000), - ('CST',-21600), - ('MDT',-21600), - ('ZP6',-21600), - ('MST',-25200), - ('PDT',-25200), - ('PST',-28800), - ('YDT',-28800), - ('HDT',-32400), - ('YST',-32400), - ('AKST',-32400), - - ('AHST',-36000), - ('HST',-36000), - ('CAT',-36000), - ('NT',-39600), - ('IDLW',-43200), -] -timezone_data = timezone_data + civilian_data -### add military timezones -##A-I then K-Z are used... -## z = 0 -## a - i, k-m -> + values up to 12 -## n-y - values up to -12 -## what a totally messed up system! -## I've checked with a number of sites, they all seem to think -## it works this way... darned if I can figure out why they don't -## make N -12, o -11 etceteras so that z would come in order and you'd -## have a simple progression around the globe... sigh. -zulu_data = [ - ('A', 3600), ('B', 7200), ('C', 10800), ('D', 14400), ('E', 18000), - ('F', 21600), ('G', 25200), ('H', 28800), ('I', 32400), ('K', 36000), - ('L', 39600), ('M', 43200), - ('N', -3600), ('O', -7200), ('P', -10800), ('Q', -14400), ('R', -18000), - ('S', -21600), ('T', -25200), ('U', -28800), ('V', -32400), ('W', -36000), - ('X', -39600), ('Y', -43200), - ('Z', 0), -] -# now add these, plus the expanded versions to the dict above... -# note that we only allow capitalised versions of the military -# zones! -tztemp = [] -for key, value in zulu_data: - for item in phonetics._letters: - if item[0] == key: - tztemp.append( (item, value) ) -# order is important here, want longer first -zulu_data = tztemp + zulu_data -del tztemp -# and call that done for now, folks... -timezone_data = timezone_data + zulu_data -# the rules are really big, but oh well... -def _build( data ): - """Build the name:time map and match rule for each dataset""" - data = data[:] - data.sort() # get shortest and least values first forcefully... - # then reverse that, to get longest first... - data.reverse() - names = [] - mapping = {} - for key,value in data: - names.append( objectgenerator.Literal(value=key)) - mapping[key] = value - rule = objectgenerator.FirstOfGroup( - children = names - ) - return mapping, rule -zulu_mapping, zulu_rule = _build( zulu_data ) -civilian_mapping, civilian_rule = _build( civilian_data ) -timezone_mapping, timezone_rule = _build( timezone_data ) - -c[ "military_timezone_name" ] = zulu_rule -c[ "civilian_timezone_name" ] = civilian_rule -c[ "timezone_name" ] = timezone_rule - -common.share(c) - -import time -if time.daylight: - LOCAL_ZONE = time.altzone -else: - LOCAL_ZONE = time.timezone -# account for time module's different counting procedure... -LOCAL_ZONE = -LOCAL_ZONE - -class TimeZoneNameInterpreter: - """Intepret a timezone specified as a military or civilian timezone name - - Return value is an offset from UTC given in seconds. - If a null-match is passed uses the passed defaultZone. - Returns values in seconds difference from UTC (negative - West) divided by the passed "seconds" argument. - """ - def __init__( self, defaultZone=LOCAL_ZONE, seconds=1.0): - """ - defaultZone -- ofset in seconds to be returned if there - is no value specified (null-match) - seconds -- divisor applied to the value before returning, - if you want hours, use 3600.0, if you want minutes, - use 60.0, if you want days (why?), use 86400.0 - """ - self.defaultZone = defaultZone - self.seconds = seconds - def __call__( self, (tag, left, right, children), buffer ): - value = buffer[ left: right ] - if value: - try: - return timezone_mapping[ value ]/self.seconds - except KeyError: - raise ValueError( "Unrecognised (but parsed!) TimeZone Name %s found at character position %s"%(value, left)) - else: - return self.defaultZone/self.seconds - diff -Nru simpleparse-2.1.0a1/debian/changelog simpleparse-2.2.0/debian/changelog --- simpleparse-2.1.0a1/debian/changelog 2015-08-18 12:46:11.000000000 +0000 +++ simpleparse-2.2.0/debian/changelog 2016-12-25 09:39:11.000000000 +0000 @@ -1,3 +1,18 @@ +simpleparse (2.2.0-1) unstable; urgency=medium + + [ Ondřej Nový ] + * Fixed VCS URL (https) + + [ Vincent Bernat ] + * New upstream release. + - drop "with"-keyword patch + - fix FTBFS (Closes: #824738) + * Switch to pybuild. + * Bump Standards-Version. + * Remove SF logo from documentation. + + -- Vincent Bernat Sun, 25 Dec 2016 10:39:11 +0100 + simpleparse (2.1.0a1-7) unstable; urgency=low [ Jakub Wilk ] diff -Nru simpleparse-2.1.0a1/debian/compat simpleparse-2.2.0/debian/compat --- simpleparse-2.1.0a1/debian/compat 2014-06-07 16:32:53.000000000 +0000 +++ simpleparse-2.2.0/debian/compat 2016-12-25 09:39:11.000000000 +0000 @@ -1 +1 @@ -7 +9 diff -Nru simpleparse-2.1.0a1/debian/control simpleparse-2.2.0/debian/control --- simpleparse-2.1.0a1/debian/control 2015-08-18 12:23:39.000000000 +0000 +++ simpleparse-2.2.0/debian/control 2016-12-25 09:39:11.000000000 +0000 @@ -3,21 +3,20 @@ Priority: optional Maintainer: Debian Python Modules Team Uploaders: Vincent Bernat -Build-Depends: debhelper (>= 7), - cdbs (>= 0.4.90~), +Build-Depends: debhelper (>= 7.0.50~), dh-python, python-all-dev (>= 2.6.6-3~), python-setuptools (>= 0.6b3), - python-egenix-mxdatetime -Vcs-Svn: svn://anonscm.debian.org/python-modules/packages/simpleparse/trunk/ -Vcs-Browser: http://anonscm.debian.org/viewvc/python-modules/packages/simpleparse/trunk/ -Standards-Version: 3.9.6 + python-egenix-mxdatetime, + python-nose +Vcs-Git: https://anonscm.debian.org/git/python-modules/packages/simpleparse.git +Vcs-Browser: https://anonscm.debian.org/cgit/python-modules/packages/simpleparse.git +Standards-Version: 3.9.8 Homepage: http://simpleparse.sourceforge.net/ Package: python-simpleparse Architecture: all -Depends: ${python:Depends}, ${shlibs:Depends}, ${misc:Depends} -Python-Depends: python-simpleparse-mxtexttools (>= ${binary:Version}) +Depends: ${python:Depends}, ${shlibs:Depends}, ${misc:Depends}, python-simpleparse-mxtexttools (>= ${binary:Version}) Suggests: python-simpleparse-doc Description: simple parser generator for Python SimpleParse is a BSD-licensed Python package providing a simple parser diff -Nru simpleparse-2.1.0a1/debian/.git-dpm simpleparse-2.2.0/debian/.git-dpm --- simpleparse-2.1.0a1/debian/.git-dpm 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/debian/.git-dpm 2016-12-25 09:39:11.000000000 +0000 @@ -0,0 +1,11 @@ +# see git-dpm(1) from git-dpm package +e9bf55f2d14d538775432c276b565d43b14b671c +e9bf55f2d14d538775432c276b565d43b14b671c +7d50954124663a123bd913ced1884e97fb87a8a3 +7d50954124663a123bd913ced1884e97fb87a8a3 +simpleparse_2.2.0.orig.tar.gz +784e550f2a644c16dbb1c4e995d360542dfb7610 +218082 +debianTag="debian/%e%v" +patchedTag="patched/%e%v" +upstreamTag="upstream/%e%u" diff -Nru simpleparse-2.1.0a1/debian/patches/0001-doc-remove-SourceForge-logo.patch simpleparse-2.2.0/debian/patches/0001-doc-remove-SourceForge-logo.patch --- simpleparse-2.1.0a1/debian/patches/0001-doc-remove-SourceForge-logo.patch 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/debian/patches/0001-doc-remove-SourceForge-logo.patch 2016-12-25 09:39:11.000000000 +0000 @@ -0,0 +1,88 @@ +From e9bf55f2d14d538775432c276b565d43b14b671c Mon Sep 17 00:00:00 2001 +From: Vincent Bernat +Date: Sun, 25 Dec 2016 11:01:51 +0100 +Subject: doc: remove SourceForge logo + +--- + doc/common_problems.html | 6 ------ + doc/index.html | 3 --- + doc/processing_result_trees.html | 6 ------ + doc/scanning_with_simpleparse.html | 5 ----- + doc/simpleparse_grammars.html | 5 +---- + 5 files changed, 1 insertion(+), 24 deletions(-) + +diff --git a/doc/common_problems.html b/doc/common_problems.html +index 97176bc3f2b0..e67885b959da 100644 +--- a/doc/common_problems.html ++++ b/doc/common_problems.html +@@ -133,12 +133,6 @@ tag-lists + + Up to index...
+ +-

A SourceForge Logo +-
+- Open Source project
+-

+
+ + +diff --git a/doc/index.html b/doc/index.html +index 617253bde90e..205e3b9ad0da 100644 +--- a/doc/index.html ++++ b/doc/index.html +@@ -339,7 +339,4 @@ OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, + WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + PERFORMANCE OF THIS SOFTWARE!

+-

A SourceForge Logo
+-Open Source project
+-

+ +diff --git a/doc/processing_result_trees.html b/doc/processing_result_trees.html +index 9666febd7f8e..96c05054d49f 100644 +--- a/doc/processing_result_trees.html ++++ b/doc/processing_result_trees.html +@@ -227,12 +227,6 @@ results tree has a non-standard format that you need to explicitly watch + out for while processing the results.

+ Up to index...
+ +-

A SourceForge Logo +-
+- Open Source project
+-

+
+
+
+diff --git a/doc/scanning_with_simpleparse.html b/doc/scanning_with_simpleparse.html +index 014b943fc179..9a3fa3771cca 100644 +--- a/doc/scanning_with_simpleparse.html ++++ b/doc/scanning_with_simpleparse.html +@@ -142,10 +142,5 @@ results + tree in “Processing Result + Trees”.

+ Up to index...
+-

A SourceForge Logo
+-Open Source project
+-

+ + +diff --git a/doc/simpleparse_grammars.html b/doc/simpleparse_grammars.html +index 72c04add4b23..4b47b3b25b90 100644 +--- a/doc/simpleparse_grammars.html ++++ b/doc/simpleparse_grammars.html +@@ -502,7 +502,4 @@ the grammar used during parser generation is a manually generated + version found in the simpleparse.simpleparsegrammar module.

+
declaration = r"""declarationset      :=  declaration+
declaration := ts, (unreportedname/expandedname/name) ,ts,':',':'?,'=',seq_group

element_token := lookahead_indicator?, ts, negpos_indicator?,ts, (literal/range/group/name),ts, occurence_indicator?, ts, error_on_fail?

negpos_indicator := [-+]
lookahead_indicator := "?"
occurence_indicator := [+*?]
error_on_fail := "!", (ts,literal)?

>group< := '(',seq_group, ')'
seq_group := ts,(error_on_fail/fo_group/element_token),
(ts, seq_indicator, ts,
(error_on_fail/fo_group/element_token)
)*, ts

fo_group := element_token, (ts, fo_indicator, ts, element_token)+


# following two are likely something peoples might want to
# replace in many instances...
<fo_indicator> := "/"
<seq_indicator> := ','

unreportedname := '<', name, '>'
expandedname := '>', name, '<'
name := [a-zA-Z_],[a-zA-Z0-9_]*
<ts> := ( [ \011-\015]+ / comment )*
comment := '#',-'\n'*,'\n'
literal := literalDecorator?,("'",(CHARNOSNGLQUOTE/ESCAPEDCHAR)*,"'") / ('"',(CHARNODBLQUOTE/ESCAPEDCHAR)*,'"')
literalDecorator := [c]



range := '[',CHARBRACE?,CHARDASH?, (CHARRANGE/CHARNOBRACE)*, CHARDASH?,']'
CHARBRACE := ']'
CHARDASH := '-'
CHARRANGE := CHARNOBRACE, '-', CHARNOBRACE
CHARNOBRACE := ESCAPEDCHAR/CHAR
CHAR := -[]]
ESCAPEDCHAR := '\\',( SPECIALESCAPEDCHAR / ('x',HEXESCAPEDCHAR) / ("u",UNICODEESCAPEDCHAR_16) /("U",UNICODEESCAPEDCHAR_32)/OCTALESCAPEDCHAR )
SPECIALESCAPEDCHAR := [\\abfnrtv"']
OCTALESCAPEDCHAR := [0-7],[0-7]?,[0-7]?
HEXESCAPEDCHAR := [0-9a-fA-F],[0-9a-fA-F]
CHARNODBLQUOTE := -[\\"]+
CHARNOSNGLQUOTE := -[\\']+
UNICODEESCAPEDCHAR_16 := [0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F]
UNICODEESCAPEDCHAR_32 := [0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F]
"""
+ Up to index...
+-

A SourceForge Logo
+-Open Source project
+-

+- +\ No newline at end of file ++ diff -Nru simpleparse-2.1.0a1/debian/patches/remove-with-keyword.patch simpleparse-2.2.0/debian/patches/remove-with-keyword.patch --- simpleparse-2.1.0a1/debian/patches/remove-with-keyword.patch 2015-08-18 12:45:55.000000000 +0000 +++ simpleparse-2.2.0/debian/patches/remove-with-keyword.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,131 +0,0 @@ -Remove "with" keyword to remove a warning with python2.5 - ---- SimpleParse-2.1.0a1/stt/TextTools/TextTools.py~ 2006-02-19 00:33:56.000000000 +0100 -+++ SimpleParse-2.1.0a1/stt/TextTools/TextTools.py 2008-01-26 17:07:18.000000000 +0100 -@@ -167,7 +167,7 @@ - # Extra stuff useful in combination with the C functions - # - --def replace(text,what,with,start=0,stop=None, -+def replace(text,what,withwhat,start=0,stop=None, - - SearchObject=TextSearch,join=join,joinlist=joinlist,tag=tag, - string_replace=string.replace,type=type, -@@ -188,11 +188,11 @@ - what = so.match - if stop is None: - if start == 0 and len(what) < 2: -- return string_replace(text,what,with) -+ return string_replace(text,what,withwhat) - stop = len(text) - t = ((text,sWordStart,so,+2), - # Found something, replace and continue searching -- (with,Skip+AppendTagobj,len(what),-1,-1), -+ (withwhat,Skip+AppendTagobj,len(what),-1,-1), - # Rest of text - (text,Move,ToEOF) - ) -@@ -203,13 +203,13 @@ - - # Alternative (usually slower) versions using different techniques: - --def _replace2(text,what,with,start=0,stop=None, -+def _replace2(text,what,withwhat,start=0,stop=None, - - join=join,joinlist=joinlist,tag=tag, - TextSearchType=TextSearchType,TextSearch=TextSearch): - - """Analogon to string.replace; returns a string with all occurences -- of what in text[start:stop] replaced by with. -+ of what in text[start:stop] replaced by withwhat. - - This version uses a one entry tag-table and a - Boyer-Moore-Search-object. what can be a string or a -@@ -226,13 +226,13 @@ - stop = len(text) - if type(what) is not TextSearchType: - what=TextSearch(what) -- t = ((with,sFindWord,what,+1,+0),) -+ t = ((withwhat,sFindWord,what,+1,+0),) - found,taglist,last = tag(text,t,start,stop) - if not found: - return text - return join(joinlist(text,taglist)) - --def _replace3(text,what,with, -+def _replace3(text,what,withwhat, - - join=string.join,TextSearch=TextSearch, - TextSearchType=TextSearchType): -@@ -245,12 +245,12 @@ - l = [] - x = 0 - for left,right in slices: -- l.append(text[x:left] + with) -+ l.append(text[x:left] + withwhat) - x = right - l.append(text[x:]) - return join(l,'') - --def _replace4(text,what,with, -+def _replace4(text,what,withwhat, - - join=join,joinlist=joinlist,tag=tag,TextSearch=TextSearch, - TextSearchType=TextSearchType): -@@ -262,7 +262,7 @@ - return text - repl = [None]*len(slices) - for i in range(len(slices)): -- repl[i] = (with,)+slices[i] -+ repl[i] = (withwhat,)+slices[i] - return join(joinlist(text,repl)) - - def multireplace(text,replacements,start=0,stop=None, -@@ -569,16 +569,16 @@ - print 'Replacing strings' - print '-'*72 - print -- for what,with in (('m','M'),('mx','MX'),('mxText','MXTEXT'), -+ for what,withwhat in (('m','M'),('mx','MX'),('mxText','MXTEXT'), - ('hmm','HMM'),('hmmm','HMM'),('hmhmm','HMM')): -- print 'Replace "%s" with "%s"' % (what,with) -+ print 'Replace "%s" with "%s"' % (what,withwhat) - t.start() - for i in range(100): -- rtext = string.replace(text,what,with) -+ rtext = string.replace(text,what,withwhat) - print 'with string.replace:',t.stop(),'sec.' - t.start() - for i in range(100): -- ttext = replace(text,what,with) -+ ttext = replace(text,what,withwhat) - print 'with tag.replace:',t.stop(),'sec.' - if ttext != rtext: - print 'results are NOT ok !' -@@ -586,7 +586,7 @@ - mismatch(rtext,ttext) - t.start() - for i in range(100): -- ttext = _replace2(text,what,with) -+ ttext = _replace2(text,what,withwhat) - print 'with tag._replace2:',t.stop(),'sec.' - if ttext != rtext: - print 'results are NOT ok !' -@@ -594,7 +594,7 @@ - print rtext - t.start() - for i in range(100): -- ttext = _replace3(text,what,with) -+ ttext = _replace3(text,what,withwhat) - print 'with tag._replace3:',t.stop(),'sec.' - if ttext != rtext: - print 'results are NOT ok !' -@@ -602,7 +602,7 @@ - print rtext - t.start() - for i in range(100): -- ttext = _replace4(text,what,with) -+ ttext = _replace4(text,what,withwhat) - print 'with tag._replace4:',t.stop(),'sec.' - if ttext != rtext: - print 'results are NOT ok !' diff -Nru simpleparse-2.1.0a1/debian/patches/series simpleparse-2.2.0/debian/patches/series --- simpleparse-2.1.0a1/debian/patches/series 2015-08-18 12:45:55.000000000 +0000 +++ simpleparse-2.2.0/debian/patches/series 2016-12-25 09:39:11.000000000 +0000 @@ -1 +1 @@ -remove-with-keyword.patch +0001-doc-remove-SourceForge-logo.patch diff -Nru simpleparse-2.1.0a1/debian/python-simpleparse.install simpleparse-2.2.0/debian/python-simpleparse.install --- simpleparse-2.1.0a1/debian/python-simpleparse.install 2014-06-07 16:32:56.000000000 +0000 +++ simpleparse-2.2.0/debian/python-simpleparse.install 2016-12-25 09:39:11.000000000 +0000 @@ -1,7 +1,6 @@ -usr/lib/python*/site-packages/simpleparse/*.py -usr/lib/python*/site-packages/simpleparse/common/*.py -usr/lib/python*/site-packages/simpleparse/tests/*.py -usr/lib/python*/site-packages/simpleparse/stt/*.py -usr/lib/python*/site-packages/simpleparse/stt/TextTools/*.py -usr/lib/python*/site-packages/simpleparse/stt/TextTools/Constants/*.py -usr/lib/python*/site-packages/simpleparse/xml/*.py +usr/lib/python*/dist-packages/simpleparse/*.py +usr/lib/python*/dist-packages/simpleparse/common/*.py +usr/lib/python*/dist-packages/simpleparse/stt/*.py +usr/lib/python*/dist-packages/simpleparse/stt/TextTools/*.py +usr/lib/python*/dist-packages/simpleparse/stt/TextTools/Constants/*.py +usr/lib/python*/dist-packages/simpleparse/xmlparser/*.py diff -Nru simpleparse-2.1.0a1/debian/python-simpleparse-mxtexttools.install simpleparse-2.2.0/debian/python-simpleparse-mxtexttools.install --- simpleparse-2.1.0a1/debian/python-simpleparse-mxtexttools.install 2014-06-07 16:32:56.000000000 +0000 +++ simpleparse-2.2.0/debian/python-simpleparse-mxtexttools.install 2016-12-25 09:39:11.000000000 +0000 @@ -1 +1 @@ -usr/lib/python*/site-packages/simpleparse/stt/TextTools/mxTextTools/* +usr/lib/python*/dist-packages/simpleparse/stt/TextTools/mxTextTools/* diff -Nru simpleparse-2.1.0a1/debian/rules simpleparse-2.2.0/debian/rules --- simpleparse-2.1.0a1/debian/rules 2015-08-18 12:20:19.000000000 +0000 +++ simpleparse-2.2.0/debian/rules 2016-12-25 09:39:11.000000000 +0000 @@ -1,20 +1,4 @@ #!/usr/bin/make -f -include /usr/share/cdbs/1/rules/debhelper.mk -include /usr/share/cdbs/1/class/python-distutils.mk - -binary-predeb/python-simpleparse-doc:: - chmod 0644 $(CURDIR)/debian/python-simpleparse-doc/usr/share/doc/python-simpleparse-doc/examples/* - -install/python-simpleparse:: -ifeq (,$(findstring nocheck,$(DEB_BUILD_OPTIONS))) - ln -s ../test_printers_garbage.py tests/test_printers_garbage.py - for PY in $(shell pyversions -r); do \ - $$PY debian/runtests.py debian/tmp || exit 1 ; \ - done - rm -f tests/test_printers_garbage.py -endif - -clean:: - rm -f tests/test_printers_garbage.py - rm -rf build +%: + dh $@ --with python2 --buildsystem=pybuild diff -Nru simpleparse-2.1.0a1/dispatchprocessor.py simpleparse-2.2.0/dispatchprocessor.py --- simpleparse-2.1.0a1/dispatchprocessor.py 2006-02-18 23:12:03.000000000 +0000 +++ simpleparse-2.2.0/dispatchprocessor.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,122 +0,0 @@ -"""Dispatch-processor API - -This is a post-processing processor API based on dispatching -each element of a result tree in a top-down recursive call -structure. It is the API used by the SimpleParseGrammar Parser, -and likely will be the default processor for SimpleParse. -""" -from simpleparse.processor import Processor - -class DispatchProcessor(Processor): - """Dispatch results-tree in a top-down recursive pattern with - attribute lookup to determine production -> method correspondence. - - To use the class, subclass it, then define methods for - processing each production. The methods should take this form: - def production_name( self, (tag, left, right, children), buffer): - pass - Where children may be either a list, or None, and buffer is the - entire buffer being parsed. - """ - def __call__( self, value, buffer ): - """Process the results of the parsing run over buffer - - Value can either be: (success, tags, next) for a top-level - production, or (tag, left, right, children) for a non-top - production. - """ - if len( value ) == 3: - # is a top-level production - success, tags, next = value - if success: - result = dispatchList( self, tags, buffer ) - return success, result, next - else: - return success, tags, next - else: - # is a 4-item result tuple/tree - return dispatch( self, value, buffer ) - - -def dispatch( source, tag, buffer ): - """Dispatch on source for tag with buffer - - Find the attribute or key tag[0] of source, - then call it with (tag, buffer) - """ - try: - function = getattr (source, tag[0]) - except AttributeError: - try: - function = source[tag[0]] - except: - raise AttributeError( '''No processing function for tag "%s" in object %s! Check the parser definition!'''%(tag[0], repr(source))) - return function( tag, buffer ) - -def dispatchList( source, taglist, buffer ): - """Dispatch on source for each tag in taglist with buffer""" - if taglist: - return map( dispatch, [source]*len(taglist), taglist, [buffer]*len(taglist)) - else: - return [] - -try: - {}.setdefault -except AttributeError: - def multiMap( taglist ): - """Convert the taglist to a mapping from tag-object:[list-of-tags] - (slower non-setdefault version for older Python versions)""" - set = {} - if not taglist: - return set - for tag in taglist: - key = tag[0] - if source and buffer: - tag = dispatch( source, tag, buffer ) - set[key] = set.get(key, []) + [tag] - return set -else: - def multiMap( taglist, source=None, buffer=None ): - """Convert a taglist to a mapping from tag-object:[list-of-tags] - - For instance, if you have items of 3 different types, in any order, - you can retrieve them all sorted by type with multimap( childlist) - then access them by tagobject key. - """ - set = {} - if not taglist: - return set - for tag in taglist: - key = tag[0] - if source and buffer: - tag = dispatch( source, tag, buffer ) - set.setdefault(key,[]).append( tag ) - return set -def singleMap( taglist, source=None, buffer=None ): - """Convert a taglist to a mapping from tag-object:tag, overwritting early with late tags""" - set = {} - if not taglist: - return set - for tag in taglist: - key = tag[0] - if source and buffer: - tag = dispatch( source, tag, buffer ) - set[key] = tag - return set - -def getString( (tag, left, right, sublist), buffer): - """Return the string value of the tag passed""" - return buffer[ left:right ] - -try: - from simpleparse.stt.TextTools import countlines -except ImportError: - def lines( start=None, end=None, buffer=None ): - """Return line number in file at character index (string.count version)""" - return string.count(buffer, '\n', start or 0, end or len(buffer) ) -else: - def lines( start=None, end=None, buffer=None ): - """Return line number in file at character index (mx.TextTools version)""" - return countlines (buffer[start or 0:end or len(buffer)]) - - diff -Nru simpleparse-2.1.0a1/doc/index.html simpleparse-2.2.0/doc/index.html --- simpleparse-2.1.0a1/doc/index.html 2006-02-19 01:58:24.000000000 +0000 +++ simpleparse-2.2.0/doc/index.html 2015-11-11 18:42:23.000000000 +0000 @@ -1,25 +1,20 @@ - - + + - - - - SimpleParse 2.0 + + SimpleParse 3.0 + - - - +

SimpleParse A Parser Generator for mxTextTools -v2.1.0

+v3.0.0

SimpleParse is a BSD-licensed Python package providing a simple and fast parser generator using a modified version of the mxTextTools text-tagging engine. SimpleParse allows you to generate parsers directly from your -EBNF grammar.
-

+EBNF grammar.

Unlike most parser generators, SimpleParse generates single-pass parsers (there is no distinct tokenization stage), an approach taken from the predecessor project (mcf.pars) which @@ -53,8 +48,7 @@

  • Common Problems -- description of a number of common bugs, errors, pitfalls and anti-patterns when using the engine.
  • -
  • IBM +
  • IBM DeveloperWorks Article by Dr. David Mertz -- discusses (and teaches the use of) SimpleParse 1.0, contrasting the EBNF-based parser with tools such as regexen for text-processing tasks.  Watch also @@ -73,26 +67,25 @@
  • Acquisition and Installation

    -

    You will need a copy of Python with distutils -support (Python versions 2.0 and above include this). You'll also need -a C -compiler compatible with your Python build and understood by distutils.

    -

    To install the base SimpleParse engine, download -the latest version in your preferred format. If you are using the -Win32 installer, simply run the executable. If you are using one of the -source distributions, unpack the distribution into a -temporary directory (maintaining the directory structure) -then run:

    -
    setup.py install
    -

    in the top directory created by the expansion process.  This -will cause the patched mxTextTools library to be built as a sub-package -of the simpleparse package and will then install the whole package to -your system.
    -

    +

    You will need a copy of Python 2.7, 3.3 or above. If you are compiling +the package you'll also need a C compiler compatible with your Python.

    +

    To install the base SimpleParse engine:

    +
    $ pip install SimpleParse

    Features/Changelog

    -

    New in 2.1.0a1:

    +

    New in 3.0.0:

    +
      +
    • Experimental Python 3.3+ support (thanks to Anthony Tuininga). Python 2.7 is still the recommended version.
    • +
    • Under Python 3.x the default is to generate unicode parsers; these are *much* slower than byte parsers
    • +
    • Test suite is now nosetests based
    • +
    • Small code cleanups throughout
    • +
    +

    New in 2.1.1:

    +
      +
    • Fixes for stubbed-in Unicode character ranges, unicode support is still very much experimental
      +
    • +
    +

    New in 2.1.1a2:

    +
    • Disable all of the mxDebugPrintf functionality, which should allow us to build on Win32 with Mingw32 for Python 2.6

    New in 2.1.1a1:

    • Fixes to build under Python 2.6
    • Rename of simpleparse.xml to simpleparse.xmlparser to avoid conflicts with standard library "xml"
    • Eliminate use of .message on exceptions, as this has been deprecated in Python 2.6

    New in 2.1.0a1:

    • Includes (patched) mxTextTools extension as part of SimpleParse, no longer uses stand-alone mxTextTools installations
      @@ -100,7 +93,9 @@
    • Retooled setup environment to build and distribute directly from the CVS checkout
    • Bug-fixes in c_comment and c_nest_comment common productions -(thanks to Stephen Waterbury), basic tests for the comment productions
    • +(thanks to Stephen Waterbury), basic tests for the comment productions +added
      +

    New in 2.0.1:

    @@ -137,8 +132,7 @@ children are reported as if the enclosing production did not exist (allows you to use productions for organisational as well as reporting purposes) -
  • Exposure of callout +
  • Exposure of callout mechanism in mxTextTools
  • Exposure of "LookAhead" mechanism in mxTextTools (allows you to spell "is followed by", "is not followed by", or "matches x but @@ -155,8 +149,7 @@ group to specify that all subsequent items must succeed.  You can specify an error message format by using a string literal after the ! character.
  • -
  • Library of common constructs (simpleparse.common package) +
  • Library of common constructs (simpleparse.common package) which are easily included in your grammars
  • Hexidecimal escapes for string and character ranges
  • @@ -225,8 +218,7 @@
  • The library of common patterns is extremely sparse
  • Unicode support
  • There is no analysis and only minimal reduction done on the -grammar.  Having now read most of Parsing Techniques - A +grammar.  Having now read most of Parsing Techniques - A Practical Guide, I can see how some fairly significant changes will be required to support such operations (and thereby the more common parsing techniques).
    @@ -261,7 +253,7 @@ this may seem silly, but it would be nice to implement a more advanced parsing algorithm directly in C, without going through the assembly-like -interface of mxTextTools.  Given that Marc-André isn't +interface of mxTextTools.  Given that Marc-André isn't interested in adopting the non-recursive codebase, there's not much point retaining compatability with mxTextTools, so moving to a more @@ -289,23 +281,18 @@ argue for using the non-recursive rewrite.

    To build the non-recursive TextTools engine, you'll need to get the source distribution for the non-recursive implementation from -the SimpleParse +the SimpleParse file repository.  Note, there are incompatabilities in the mxBase 2.1 versions that make it necessary to use the versions specified below to build the non-recursive versions.

    This archive is intended to be expanded over the mxBase source archive from the top-level directory, replacing one file @@ -330,9 +317,9 @@

    Extensions to the eGenix extensions (most significantly the rewrite of the core loop) are copyright Mike Fletcher and released under the SimpleParse License below:

    -

        Copyright © 2003-2006, Mike Fletcher

    +

        Copyright Å  2003-2006, Mike Fletcher

    SimpleParse License:

    -

    Copyright © 1998-2006, Copyright by +

    Copyright Å  1998-2006, Copyright by Mike C. Fletcher; All Rights Reserved.
    mailto: mcfletch@users.sourceforge.net

    @@ -352,11 +339,7 @@ WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE!

    -

    A SourceForge Logo
    +

    A SourceForge Logo
    Open Source project

    - - + diff -Nru simpleparse-2.1.0a1/doc/pydoc/__builtin__.html simpleparse-2.2.0/doc/pydoc/__builtin__.html --- simpleparse-2.1.0a1/doc/pydoc/__builtin__.html 2006-02-19 01:05:12.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/__builtin__.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,2438 +0,0 @@ - - -Python: built-in module __builtin__ - - - - -
     
    - 
    __builtin__
    index
    (built-in)
    -

    Built-in functions, exceptions, and other objects.

    -Noteworthy: None is the `nil' object; Ellipsis represents `...' in slices.

    -

    - - - - - -
     
    -Classes
           
    -
    object -
    -
    -
    basestring -
    -
    -
    str -
    unicode -
    -
    -
    buffer -
    classmethod -
    complex -
    dict -
    enumerate -
    file -
    file -
    float -
    frozenset -
    int -
    -
    -
    bool -
    -
    -
    list -
    long -
    property -
    reversed -
    set -
    slice -
    staticmethod -
    super -
    tuple -
    type -
    xrange -
    -
    -
    -

    - - - - - - - -
     
    -class basestring(object)
       Type basestring cannot be instantiated; it is the base for str and unicode.
     
     Data and other attributes defined here:
    -
    __new__ = <built-in method __new__ of type object>
    T.__new__(S, ...) -> a new object with type S, a subtype of T
    - -

    - - - - - - - -
     
    -class bool(int)
       bool(x) -> bool

    -Returns True when the argument x is true, False otherwise.
    -The builtins True and False are the only two instances of the class bool.
    -The class bool is a subclass of the class int, and cannot be subclassed.
     
     
    Method resolution order:
    -
    bool
    -
    int
    -
    object
    -
    -
    -Methods defined here:
    -
    __and__(...)
    x.__and__(y) <==> x&y
    - -
    __or__(...)
    x.__or__(y) <==> x|y
    - -
    __rand__(...)
    x.__rand__(y) <==> y&x
    - -
    __repr__(...)
    x.__repr__() <==> repr(x)
    - -
    __ror__(...)
    x.__ror__(y) <==> y|x
    - -
    __rxor__(...)
    x.__rxor__(y) <==> y^x
    - -
    __str__(...)
    x.__str__() <==> str(x)
    - -
    __xor__(...)
    x.__xor__(y) <==> x^y
    - -
    -Data and other attributes defined here:
    -
    __new__ = <built-in method __new__ of type object>
    T.__new__(S, ...) -> a new object with type S, a subtype of T
    - -
    -Methods inherited from int:
    -
    __abs__(...)
    x.__abs__() <==> abs(x)
    - -
    __add__(...)
    x.__add__(y) <==> x+y
    - -
    __cmp__(...)
    x.__cmp__(y) <==> cmp(x,y)
    - -
    __coerce__(...)
    x.__coerce__(y) <==> coerce(x, y)
    - -
    __div__(...)
    x.__div__(y) <==> x/y
    - -
    __divmod__(...)
    x.__divmod__(y) <==> divmod(x, y)
    - -
    __float__(...)
    x.__float__() <==> float(x)
    - -
    __floordiv__(...)
    x.__floordiv__(y) <==> x//y
    - -
    __getattribute__(...)
    x.__getattribute__('name') <==> x.name
    - -
    __getnewargs__(...)
    - -
    __hash__(...)
    x.__hash__() <==> hash(x)
    - -
    __hex__(...)
    x.__hex__() <==> hex(x)
    - -
    __int__(...)
    x.__int__() <==> int(x)
    - -
    __invert__(...)
    x.__invert__() <==> ~x
    - -
    __long__(...)
    x.__long__() <==> long(x)
    - -
    __lshift__(...)
    x.__lshift__(y) <==> x<<y
    - -
    __mod__(...)
    x.__mod__(y) <==> x%y
    - -
    __mul__(...)
    x.__mul__(y) <==> x*y
    - -
    __neg__(...)
    x.__neg__() <==> -x
    - -
    __nonzero__(...)
    x.__nonzero__() <==> x != 0
    - -
    __oct__(...)
    x.__oct__() <==> oct(x)
    - -
    __pos__(...)
    x.__pos__() <==> +x
    - -
    __pow__(...)
    x.__pow__(y[, z]) <==> pow(x, y[, z])
    - -
    __radd__(...)
    x.__radd__(y) <==> y+x
    - -
    __rdiv__(...)
    x.__rdiv__(y) <==> y/x
    - -
    __rdivmod__(...)
    x.__rdivmod__(y) <==> divmod(y, x)
    - -
    __rfloordiv__(...)
    x.__rfloordiv__(y) <==> y//x
    - -
    __rlshift__(...)
    x.__rlshift__(y) <==> y<<x
    - -
    __rmod__(...)
    x.__rmod__(y) <==> y%x
    - -
    __rmul__(...)
    x.__rmul__(y) <==> y*x
    - -
    __rpow__(...)
    y.__rpow__(x[, z]) <==> pow(x, y[, z])
    - -
    __rrshift__(...)
    x.__rrshift__(y) <==> y>>x
    - -
    __rshift__(...)
    x.__rshift__(y) <==> x>>y
    - -
    __rsub__(...)
    x.__rsub__(y) <==> y-x
    - -
    __rtruediv__(...)
    x.__rtruediv__(y) <==> y/x
    - -
    __sub__(...)
    x.__sub__(y) <==> x-y
    - -
    __truediv__(...)
    x.__truediv__(y) <==> x/y
    - -

    - - - - - - - -
     
    -class buffer(object)
       buffer(object [, offset[, size]])

    -Create a new buffer object which references the given object.
    -The buffer will reference a slice of the target object from the
    -start of the object (or at the specified offset). The slice will
    -extend to the end of the target object (or with the specified size).
     
     Methods defined here:
    -
    __add__(...)
    x.__add__(y) <==> x+y
    - -
    __cmp__(...)
    x.__cmp__(y) <==> cmp(x,y)
    - -
    __delitem__(...)
    x.__delitem__(y) <==> del x[y]
    - -
    __delslice__(...)
    x.__delslice__(i, j) <==> del x[i:j]

    -Use of negative indices is not supported.
    - -
    __getattribute__(...)
    x.__getattribute__('name') <==> x.name
    - -
    __getitem__(...)
    x.__getitem__(y) <==> x[y]
    - -
    __getslice__(...)
    x.__getslice__(i, j) <==> x[i:j]

    -Use of negative indices is not supported.
    - -
    __hash__(...)
    x.__hash__() <==> hash(x)
    - -
    __len__(...)
    x.__len__() <==> len(x)
    - -
    __mul__(...)
    x.__mul__(n) <==> x*n
    - -
    __repr__(...)
    x.__repr__() <==> repr(x)
    - -
    __rmul__(...)
    x.__rmul__(n) <==> n*x
    - -
    __setitem__(...)
    x.__setitem__(i, y) <==> x[i]=y
    - -
    __setslice__(...)
    x.__setslice__(i, j, y) <==> x[i:j]=y

    -Use  of negative indices is not supported.
    - -
    __str__(...)
    x.__str__() <==> str(x)
    - -
    -Data and other attributes defined here:
    -
    __new__ = <built-in method __new__ of type object>
    T.__new__(S, ...) -> a new object with type S, a subtype of T
    - -

    - - - - - - - -
     
    -class classmethod(object)
       classmethod(function) -> method

    -Convert a function to be a class method.

    -A class method receives the class as implicit first argument,
    -just like an instance method receives the instance.
    -To declare a class method, use this idiom:

    -  class C:
    -      def f(cls, arg1, arg2, ...): ...
    -      f = classmethod(f)

    -It can be called either on the class (e.g. C.f()) or on an instance
    -(e.g. C().f()).  The instance is ignored except for its class.
    -If a class method is called for a derived class, the derived class
    -object is passed as the implied first argument.

    -Class methods are different than C++ or Java static methods.
    -If you want those, see the staticmethod builtin.
     
     Methods defined here:
    -
    __get__(...)
    descr.__get__(obj[, type]) -> value
    - -
    __getattribute__(...)
    x.__getattribute__('name') <==> x.name
    - -
    __init__(...)
    x.__init__(...) initializes x; see x.__class__.__doc__ for signature
    - -
    -Data and other attributes defined here:
    -
    __new__ = <built-in method __new__ of type object>
    T.__new__(S, ...) -> a new object with type S, a subtype of T
    - -

    - - - - - - - -
     
    -class complex(object)
       complex(real[, imag]) -> complex number

    -Create a complex number from a real part and an optional imaginary part.
    -This is equivalent to (real + imag*1j) where imag defaults to 0.
     
     Methods defined here:
    -
    __abs__(...)
    x.__abs__() <==> abs(x)
    - -
    __add__(...)
    x.__add__(y) <==> x+y
    - -
    __coerce__(...)
    x.__coerce__(y) <==> coerce(x, y)
    - -
    __div__(...)
    x.__div__(y) <==> x/y
    - -
    __divmod__(...)
    x.__divmod__(y) <==> divmod(x, y)
    - -
    __eq__(...)
    x.__eq__(y) <==> x==y
    - -
    __float__(...)
    x.__float__() <==> float(x)
    - -
    __floordiv__(...)
    x.__floordiv__(y) <==> x//y
    - -
    __ge__(...)
    x.__ge__(y) <==> x>=y
    - -
    __getattribute__(...)
    x.__getattribute__('name') <==> x.name
    - -
    __getnewargs__(...)
    - -
    __gt__(...)
    x.__gt__(y) <==> x>y
    - -
    __hash__(...)
    x.__hash__() <==> hash(x)
    - -
    __int__(...)
    x.__int__() <==> int(x)
    - -
    __le__(...)
    x.__le__(y) <==> x<=y
    - -
    __long__(...)
    x.__long__() <==> long(x)
    - -
    __lt__(...)
    x.__lt__(y) <==> x<y
    - -
    __mod__(...)
    x.__mod__(y) <==> x%y
    - -
    __mul__(...)
    x.__mul__(y) <==> x*y
    - -
    __ne__(...)
    x.__ne__(y) <==> x!=y
    - -
    __neg__(...)
    x.__neg__() <==> -x
    - -
    __nonzero__(...)
    x.__nonzero__() <==> x != 0
    - -
    __pos__(...)
    x.__pos__() <==> +x
    - -
    __pow__(...)
    x.__pow__(y[, z]) <==> pow(x, y[, z])
    - -
    __radd__(...)
    x.__radd__(y) <==> y+x
    - -
    __rdiv__(...)
    x.__rdiv__(y) <==> y/x
    - -
    __rdivmod__(...)
    x.__rdivmod__(y) <==> divmod(y, x)
    - -
    __repr__(...)
    x.__repr__() <==> repr(x)
    - -
    __rfloordiv__(...)
    x.__rfloordiv__(y) <==> y//x
    - -
    __rmod__(...)
    x.__rmod__(y) <==> y%x
    - -
    __rmul__(...)
    x.__rmul__(y) <==> y*x
    - -
    __rpow__(...)
    y.__rpow__(x[, z]) <==> pow(x, y[, z])
    - -
    __rsub__(...)
    x.__rsub__(y) <==> y-x
    - -
    __rtruediv__(...)
    x.__rtruediv__(y) <==> y/x
    - -
    __str__(...)
    x.__str__() <==> str(x)
    - -
    __sub__(...)
    x.__sub__(y) <==> x-y
    - -
    __truediv__(...)
    x.__truediv__(y) <==> x/y
    - -
    conjugate(...)
    - -
    -Data and other attributes defined here:
    -
    __new__ = <built-in method __new__ of type object>
    T.__new__(S, ...) -> a new object with type S, a subtype of T
    - -
    imag = <member 'imag' of 'complex' objects>
    the imaginary part of a complex number
    - -
    real = <member 'real' of 'complex' objects>
    the real part of a complex number
    - -

    - - - - - - - -
     
    -class dict(object)
       dict() -> new empty dictionary.
    -dict(mapping) -> new dictionary initialized from a mapping object's
    -    (key, value) pairs.
    -dict(seq) -> new dictionary initialized as if via:
    -    d = {}
    -    for k, v in seq:
    -        d[k] = v
    -dict(**kwargs) -> new dictionary initialized with the name=value pairs
    -    in the keyword argument list.  For example:  dict(one=1, two=2)
     
     Methods defined here:
    -
    __cmp__(...)
    x.__cmp__(y) <==> cmp(x,y)
    - -
    __contains__(...)
    D.__contains__(k) -> True if D has a key k, else False
    - -
    __delitem__(...)
    x.__delitem__(y) <==> del x[y]
    - -
    __eq__(...)
    x.__eq__(y) <==> x==y
    - -
    __ge__(...)
    x.__ge__(y) <==> x>=y
    - -
    __getattribute__(...)
    x.__getattribute__('name') <==> x.name
    - -
    __getitem__(...)
    x.__getitem__(y) <==> x[y]
    - -
    __gt__(...)
    x.__gt__(y) <==> x>y
    - -
    __hash__(...)
    x.__hash__() <==> hash(x)
    - -
    __init__(...)
    x.__init__(...) initializes x; see x.__class__.__doc__ for signature
    - -
    __iter__(...)
    x.__iter__() <==> iter(x)
    - -
    __le__(...)
    x.__le__(y) <==> x<=y
    - -
    __len__(...)
    x.__len__() <==> len(x)
    - -
    __lt__(...)
    x.__lt__(y) <==> x<y
    - -
    __ne__(...)
    x.__ne__(y) <==> x!=y
    - -
    __repr__(...)
    x.__repr__() <==> repr(x)
    - -
    __setitem__(...)
    x.__setitem__(i, y) <==> x[i]=y
    - -
    clear(...)
    D.clear() -> None.  Remove all items from D.
    - -
    copy(...)
    D.copy() -> a shallow copy of D
    - -
    get(...)
    D.get(k[,d]) -> D[k] if k in D, else d.  d defaults to None.
    - -
    has_key(...)
    D.has_key(k) -> True if D has a key k, else False
    - -
    items(...)
    D.items() -> list of D's (key, value) pairs, as 2-tuples
    - -
    iteritems(...)
    D.iteritems() -> an iterator over the (key, value) items of D
    - -
    iterkeys(...)
    D.iterkeys() -> an iterator over the keys of D
    - -
    itervalues(...)
    D.itervalues() -> an iterator over the values of D
    - -
    keys(...)
    D.keys() -> list of D's keys
    - -
    pop(...)
    D.pop(k[,d]) -> v, remove specified key and return the corresponding value
    -If key is not found, d is returned if given, otherwise KeyError is raised
    - -
    popitem(...)
    D.popitem() -> (k, v), remove and return some (key, value) pair as a
    -2-tuple; but raise KeyError if D is empty
    - -
    setdefault(...)
    D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D
    - -
    update(...)
    D.update(E, **F) -> None.  Update D from E and F: for k in E: D[k] = E[k]
    -(if E has keys else: for (k, v) in E: D[k] = v) then: for k in F: D[k] = F[k]
    - -
    values(...)
    D.values() -> list of D's values
    - -
    -Data and other attributes defined here:
    -
    __new__ = <built-in method __new__ of type object>
    T.__new__(S, ...) -> a new object with type S, a subtype of T
    - -
    fromkeys = <built-in method fromkeys of type object>
    dict.fromkeys(S[,v]) -> New dict with keys from S and values equal to v.
    -v defaults to None.
    - -

    - - - - - - - -
     
    -class enumerate(object)
       enumerate(iterable) -> iterator for index, value of iterable

    -Return an enumerate object.  iterable must be an other object that supports
    -iteration.  The enumerate object yields pairs containing a count (from
    -zero) and a value yielded by the iterable argument.  enumerate is useful
    -for obtaining an indexed list: (0, seq[0]), (1, seq[1]), (2, seq[2]), ...
     
     Methods defined here:
    -
    __getattribute__(...)
    x.__getattribute__('name') <==> x.name
    - -
    __iter__(...)
    x.__iter__() <==> iter(x)
    - -
    next(...)
    x.next() -> the next value, or raise StopIteration
    - -
    -Data and other attributes defined here:
    -
    __new__ = <built-in method __new__ of type object>
    T.__new__(S, ...) -> a new object with type S, a subtype of T
    - -

    - - - - - - - -
     
    -class file(object)
       file(name[, mode[, buffering]]) -> file object

    -Open a file.  The mode can be 'r', 'w' or 'a' for reading (default),
    -writing or appending.  The file will be created if it doesn't exist
    -when opened for writing or appending; it will be truncated when
    -opened for writing.  Add a 'b' to the mode for binary files.
    -Add a '+' to the mode to allow simultaneous reading and writing.
    -If the buffering argument is given, 0 means unbuffered, 1 means line
    -buffered, and larger numbers specify the buffer size.
    -Add a 'U' to mode to open the file for input with universal newline
    -support.  Any line ending in the input file will be seen as a '\n'
    -in Python.  Also, a file so opened gains the attribute 'newlines';
    -the value for this attribute is one of None (no newline read yet),
    -'\r', '\n', '\r\n' or a tuple containing all the newline types seen.

    -'U' cannot be combined with 'w' or '+' mode.

    -Note:  open() is an alias for file().
     
     Methods defined here:
    -
    __delattr__(...)
    x.__delattr__('name') <==> del x.name
    - -
    __getattribute__(...)
    x.__getattribute__('name') <==> x.name
    - -
    __init__(...)
    x.__init__(...) initializes x; see x.__class__.__doc__ for signature
    - -
    __iter__(...)
    x.__iter__() <==> iter(x)
    - -
    __repr__(...)
    x.__repr__() <==> repr(x)
    - -
    __setattr__(...)
    x.__setattr__('name', value) <==> x.name = value
    - -
    close(...)
    close() -> None or (perhaps) an integer.  Close the file.

    -Sets data attribute .closed to True.  A closed file cannot be used for
    -further I/O operations.  close() may be called more than once without
    -error.  Some kinds of file objects (for example, opened by popen())
    -may return an exit status upon closing.
    - -
    fileno(...)
    fileno() -> integer "file descriptor".

    -This is needed for lower-level file interfaces, such os.read().
    - -
    flush(...)
    flush() -> None.  Flush the internal I/O buffer.
    - -
    isatty(...)
    isatty() -> true or false.  True if the file is connected to a tty device.
    - -
    next(...)
    x.next() -> the next value, or raise StopIteration
    - -
    read(...)
    read([size]) -> read at most size bytes, returned as a string.

    -If the size argument is negative or omitted, read until EOF is reached.
    -Notice that when in non-blocking mode, less data than what was requested
    -may be returned, even if no size parameter was given.
    - -
    readinto(...)
    readinto() -> Undocumented.  Don't use this; it may go away.
    - -
    readline(...)
    readline([size]) -> next line from the file, as a string.

    -Retain newline.  A non-negative size argument limits the maximum
    -number of bytes to return (an incomplete line may be returned then).
    -Return an empty string at EOF.
    - -
    readlines(...)
    readlines([size]) -> list of strings, each a line from the file.

    -Call readline() repeatedly and return a list of the lines so read.
    -The optional size argument, if given, is an approximate bound on the
    -total number of bytes in the lines returned.
    - -
    seek(...)
    seek(offset[, whence]) -> None.  Move to new file position.

    -Argument offset is a byte count.  Optional argument whence defaults to
    -0 (offset from start of file, offset should be >= 0); other values are 1
    -(move relative to current position, positive or negative), and 2 (move
    -relative to end of file, usually negative, although many platforms allow
    -seeking beyond the end of a file).  If the file is opened in text mode,
    -only offsets returned by tell() are legal.  Use of other offsets causes
    -undefined behavior.
    -Note that not all file objects are seekable.
    - -
    tell(...)
    tell() -> current file position, an integer (may be a long integer).
    - -
    truncate(...)
    truncate([size]) -> None.  Truncate the file to at most size bytes.

    -Size defaults to the current file position, as returned by tell().
    - -
    write(...)
    write(str) -> None.  Write string str to file.

    -Note that due to buffering, flush() or close() may be needed before
    -the file on disk reflects the data written.
    - -
    writelines(...)
    writelines(sequence_of_strings) -> None.  Write the strings to the file.

    -Note that newlines are not added.  The sequence can be any iterable object
    -producing strings. This is equivalent to calling write() for each string.
    - -
    xreadlines(...)
    xreadlines() -> returns self.

    -For backward compatibility. File objects now include the performance
    -optimizations previously implemented in the xreadlines module.
    - -
    -Data and other attributes defined here:
    -
    __new__ = <built-in method __new__ of type object>
    T.__new__(S, ...) -> a new object with type S, a subtype of T
    - -
    closed = <attribute 'closed' of 'file' objects>
    True if the file is closed
    - -
    encoding = <member 'encoding' of 'file' objects>
    file encoding
    - -
    mode = <member 'mode' of 'file' objects>
    file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)
    - -
    name = <member 'name' of 'file' objects>
    file name
    - -
    newlines = <attribute 'newlines' of 'file' objects>
    end-of-line convention used in this file
    - -
    softspace = <member 'softspace' of 'file' objects>
    flag indicating that a space needs to be printed; used by print
    - -

    - - - - - - - -
     
    -class float(object)
       float(x) -> floating point number

    -Convert a string or number to a floating point number, if possible.
     
     Methods defined here:
    -
    __abs__(...)
    x.__abs__() <==> abs(x)
    - -
    __add__(...)
    x.__add__(y) <==> x+y
    - -
    __coerce__(...)
    x.__coerce__(y) <==> coerce(x, y)
    - -
    __div__(...)
    x.__div__(y) <==> x/y
    - -
    __divmod__(...)
    x.__divmod__(y) <==> divmod(x, y)
    - -
    __eq__(...)
    x.__eq__(y) <==> x==y
    - -
    __float__(...)
    x.__float__() <==> float(x)
    - -
    __floordiv__(...)
    x.__floordiv__(y) <==> x//y
    - -
    __ge__(...)
    x.__ge__(y) <==> x>=y
    - -
    __getattribute__(...)
    x.__getattribute__('name') <==> x.name
    - -
    __getnewargs__(...)
    - -
    __gt__(...)
    x.__gt__(y) <==> x>y
    - -
    __hash__(...)
    x.__hash__() <==> hash(x)
    - -
    __int__(...)
    x.__int__() <==> int(x)
    - -
    __le__(...)
    x.__le__(y) <==> x<=y
    - -
    __long__(...)
    x.__long__() <==> long(x)
    - -
    __lt__(...)
    x.__lt__(y) <==> x<y
    - -
    __mod__(...)
    x.__mod__(y) <==> x%y
    - -
    __mul__(...)
    x.__mul__(y) <==> x*y
    - -
    __ne__(...)
    x.__ne__(y) <==> x!=y
    - -
    __neg__(...)
    x.__neg__() <==> -x
    - -
    __nonzero__(...)
    x.__nonzero__() <==> x != 0
    - -
    __pos__(...)
    x.__pos__() <==> +x
    - -
    __pow__(...)
    x.__pow__(y[, z]) <==> pow(x, y[, z])
    - -
    __radd__(...)
    x.__radd__(y) <==> y+x
    - -
    __rdiv__(...)
    x.__rdiv__(y) <==> y/x
    - -
    __rdivmod__(...)
    x.__rdivmod__(y) <==> divmod(y, x)
    - -
    __repr__(...)
    x.__repr__() <==> repr(x)
    - -
    __rfloordiv__(...)
    x.__rfloordiv__(y) <==> y//x
    - -
    __rmod__(...)
    x.__rmod__(y) <==> y%x
    - -
    __rmul__(...)
    x.__rmul__(y) <==> y*x
    - -
    __rpow__(...)
    y.__rpow__(x[, z]) <==> pow(x, y[, z])
    - -
    __rsub__(...)
    x.__rsub__(y) <==> y-x
    - -
    __rtruediv__(...)
    x.__rtruediv__(y) <==> y/x
    - -
    __str__(...)
    x.__str__() <==> str(x)
    - -
    __sub__(...)
    x.__sub__(y) <==> x-y
    - -
    __truediv__(...)
    x.__truediv__(y) <==> x/y
    - -
    -Data and other attributes defined here:
    -
    __new__ = <built-in method __new__ of type object>
    T.__new__(S, ...) -> a new object with type S, a subtype of T
    - -

    - - - - - - - -
     
    -class frozenset(object)
       frozenset(iterable) --> frozenset object

    -Build an immutable unordered collection.
     
     Methods defined here:
    -
    __and__(...)
    x.__and__(y) <==> x&y
    - -
    __cmp__(...)
    x.__cmp__(y) <==> cmp(x,y)
    - -
    __contains__(...)
    x.__contains__(y) <==> y in x.
    - -
    __eq__(...)
    x.__eq__(y) <==> x==y
    - -
    __ge__(...)
    x.__ge__(y) <==> x>=y
    - -
    __getattribute__(...)
    x.__getattribute__('name') <==> x.name
    - -
    __gt__(...)
    x.__gt__(y) <==> x>y
    - -
    __hash__(...)
    x.__hash__() <==> hash(x)
    - -
    __iter__(...)
    x.__iter__() <==> iter(x)
    - -
    __le__(...)
    x.__le__(y) <==> x<=y
    - -
    __len__(...)
    x.__len__() <==> len(x)
    - -
    __lt__(...)
    x.__lt__(y) <==> x<y
    - -
    __ne__(...)
    x.__ne__(y) <==> x!=y
    - -
    __or__(...)
    x.__or__(y) <==> x|y
    - -
    __rand__(...)
    x.__rand__(y) <==> y&x
    - -
    __reduce__(...)
    Return state information for pickling.
    - -
    __repr__(...)
    x.__repr__() <==> repr(x)
    - -
    __ror__(...)
    x.__ror__(y) <==> y|x
    - -
    __rsub__(...)
    x.__rsub__(y) <==> y-x
    - -
    __rxor__(...)
    x.__rxor__(y) <==> y^x
    - -
    __sub__(...)
    x.__sub__(y) <==> x-y
    - -
    __xor__(...)
    x.__xor__(y) <==> x^y
    - -
    copy(...)
    Return a shallow copy of a set.
    - -
    difference(...)
    Return the difference of two sets as a new set.

    -(i.e. all elements that are in this set but not the other.)
    - -
    intersection(...)
    Return the intersection of two sets as a new set.

    -(i.e. all elements that are in both sets.)
    - -
    issubset(...)
    Report whether another set contains this set.
    - -
    issuperset(...)
    Report whether this set contains another set.
    - -
    symmetric_difference(...)
    Return the symmetric difference of two sets as a new set.

    -(i.e. all elements that are in exactly one of the sets.)
    - -
    union(...)
    Return the union of two sets as a new set.

    -(i.e. all elements that are in either set.)
    - -
    -Data and other attributes defined here:
    -
    __new__ = <built-in method __new__ of type object>
    T.__new__(S, ...) -> a new object with type S, a subtype of T
    - -

    - - - - - - - -
     
    -class int(object)
       int(x[, base]) -> integer

    -Convert a string or number to an integer, if possible.  A floating point
    -argument will be truncated towards zero (this does not include a string
    -representation of a floating point number!)  When converting a string, use
    -the optional base.  It is an error to supply a base when converting a
    -non-string. If the argument is outside the integer range a long object
    -will be returned instead.
     
     Methods defined here:
    -
    __abs__(...)
    x.__abs__() <==> abs(x)
    - -
    __add__(...)
    x.__add__(y) <==> x+y
    - -
    __and__(...)
    x.__and__(y) <==> x&y
    - -
    __cmp__(...)
    x.__cmp__(y) <==> cmp(x,y)
    - -
    __coerce__(...)
    x.__coerce__(y) <==> coerce(x, y)
    - -
    __div__(...)
    x.__div__(y) <==> x/y
    - -
    __divmod__(...)
    x.__divmod__(y) <==> divmod(x, y)
    - -
    __float__(...)
    x.__float__() <==> float(x)
    - -
    __floordiv__(...)
    x.__floordiv__(y) <==> x//y
    - -
    __getattribute__(...)
    x.__getattribute__('name') <==> x.name
    - -
    __getnewargs__(...)
    - -
    __hash__(...)
    x.__hash__() <==> hash(x)
    - -
    __hex__(...)
    x.__hex__() <==> hex(x)
    - -
    __int__(...)
    x.__int__() <==> int(x)
    - -
    __invert__(...)
    x.__invert__() <==> ~x
    - -
    __long__(...)
    x.__long__() <==> long(x)
    - -
    __lshift__(...)
    x.__lshift__(y) <==> x<<y
    - -
    __mod__(...)
    x.__mod__(y) <==> x%y
    - -
    __mul__(...)
    x.__mul__(y) <==> x*y
    - -
    __neg__(...)
    x.__neg__() <==> -x
    - -
    __nonzero__(...)
    x.__nonzero__() <==> x != 0
    - -
    __oct__(...)
    x.__oct__() <==> oct(x)
    - -
    __or__(...)
    x.__or__(y) <==> x|y
    - -
    __pos__(...)
    x.__pos__() <==> +x
    - -
    __pow__(...)
    x.__pow__(y[, z]) <==> pow(x, y[, z])
    - -
    __radd__(...)
    x.__radd__(y) <==> y+x
    - -
    __rand__(...)
    x.__rand__(y) <==> y&x
    - -
    __rdiv__(...)
    x.__rdiv__(y) <==> y/x
    - -
    __rdivmod__(...)
    x.__rdivmod__(y) <==> divmod(y, x)
    - -
    __repr__(...)
    x.__repr__() <==> repr(x)
    - -
    __rfloordiv__(...)
    x.__rfloordiv__(y) <==> y//x
    - -
    __rlshift__(...)
    x.__rlshift__(y) <==> y<<x
    - -
    __rmod__(...)
    x.__rmod__(y) <==> y%x
    - -
    __rmul__(...)
    x.__rmul__(y) <==> y*x
    - -
    __ror__(...)
    x.__ror__(y) <==> y|x
    - -
    __rpow__(...)
    y.__rpow__(x[, z]) <==> pow(x, y[, z])
    - -
    __rrshift__(...)
    x.__rrshift__(y) <==> y>>x
    - -
    __rshift__(...)
    x.__rshift__(y) <==> x>>y
    - -
    __rsub__(...)
    x.__rsub__(y) <==> y-x
    - -
    __rtruediv__(...)
    x.__rtruediv__(y) <==> y/x
    - -
    __rxor__(...)
    x.__rxor__(y) <==> y^x
    - -
    __str__(...)
    x.__str__() <==> str(x)
    - -
    __sub__(...)
    x.__sub__(y) <==> x-y
    - -
    __truediv__(...)
    x.__truediv__(y) <==> x/y
    - -
    __xor__(...)
    x.__xor__(y) <==> x^y
    - -
    -Data and other attributes defined here:
    -
    __new__ = <built-in method __new__ of type object>
    T.__new__(S, ...) -> a new object with type S, a subtype of T
    - -

    - - - - - - - -
     
    -class list(object)
       list() -> new list
    -list(sequence) -> new list initialized from sequence's items
     
     Methods defined here:
    -
    __add__(...)
    x.__add__(y) <==> x+y
    - -
    __contains__(...)
    x.__contains__(y) <==> y in x
    - -
    __delitem__(...)
    x.__delitem__(y) <==> del x[y]
    - -
    __delslice__(...)
    x.__delslice__(i, j) <==> del x[i:j]

    -Use of negative indices is not supported.
    - -
    __eq__(...)
    x.__eq__(y) <==> x==y
    - -
    __ge__(...)
    x.__ge__(y) <==> x>=y
    - -
    __getattribute__(...)
    x.__getattribute__('name') <==> x.name
    - -
    __getitem__(...)
    x.__getitem__(y) <==> x[y]
    - -
    __getslice__(...)
    x.__getslice__(i, j) <==> x[i:j]

    -Use of negative indices is not supported.
    - -
    __gt__(...)
    x.__gt__(y) <==> x>y
    - -
    __hash__(...)
    x.__hash__() <==> hash(x)
    - -
    __iadd__(...)
    x.__iadd__(y) <==> x+=y
    - -
    __imul__(...)
    x.__imul__(y) <==> x*=y
    - -
    __init__(...)
    x.__init__(...) initializes x; see x.__class__.__doc__ for signature
    - -
    __iter__(...)
    x.__iter__() <==> iter(x)
    - -
    __le__(...)
    x.__le__(y) <==> x<=y
    - -
    __len__(...)
    x.__len__() <==> len(x)
    - -
    __lt__(...)
    x.__lt__(y) <==> x<y
    - -
    __mul__(...)
    x.__mul__(n) <==> x*n
    - -
    __ne__(...)
    x.__ne__(y) <==> x!=y
    - -
    __repr__(...)
    x.__repr__() <==> repr(x)
    - -
    __reversed__(...)
    L.__reversed__() -- return a reverse iterator over the list
    - -
    __rmul__(...)
    x.__rmul__(n) <==> n*x
    - -
    __setitem__(...)
    x.__setitem__(i, y) <==> x[i]=y
    - -
    __setslice__(...)
    x.__setslice__(i, j, y) <==> x[i:j]=y

    -Use  of negative indices is not supported.
    - -
    append(...)
    L.append(object) -- append object to end
    - -
    count(...)
    L.count(value) -> integer -- return number of occurrences of value
    - -
    extend(...)
    L.extend(iterable) -- extend list by appending elements from the iterable
    - -
    index(...)
    L.index(value, [start, [stop]]) -> integer -- return first index of value
    - -
    insert(...)
    L.insert(index, object) -- insert object before index
    - -
    pop(...)
    L.pop([index]) -> item -- remove and return item at index (default last)
    - -
    remove(...)
    L.remove(value) -- remove first occurrence of value
    - -
    reverse(...)
    L.reverse() -- reverse *IN PLACE*
    - -
    sort(...)
    L.sort(cmp=None, key=None, reverse=False) -- stable sort *IN PLACE*;
    -cmp(x, y) -> -1, 0, 1
    - -
    -Data and other attributes defined here:
    -
    __new__ = <built-in method __new__ of type object>
    T.__new__(S, ...) -> a new object with type S, a subtype of T
    - -

    - - - - - - - -
     
    -class long(object)
       long(x[, base]) -> integer

    -Convert a string or number to a long integer, if possible.  A floating
    -point argument will be truncated towards zero (this does not include a
    -string representation of a floating point number!)  When converting a
    -string, use the optional base.  It is an error to supply a base when
    -converting a non-string.
     
     Methods defined here:
    -
    __abs__(...)
    x.__abs__() <==> abs(x)
    - -
    __add__(...)
    x.__add__(y) <==> x+y
    - -
    __and__(...)
    x.__and__(y) <==> x&y
    - -
    __cmp__(...)
    x.__cmp__(y) <==> cmp(x,y)
    - -
    __coerce__(...)
    x.__coerce__(y) <==> coerce(x, y)
    - -
    __div__(...)
    x.__div__(y) <==> x/y
    - -
    __divmod__(...)
    x.__divmod__(y) <==> divmod(x, y)
    - -
    __float__(...)
    x.__float__() <==> float(x)
    - -
    __floordiv__(...)
    x.__floordiv__(y) <==> x//y
    - -
    __getattribute__(...)
    x.__getattribute__('name') <==> x.name
    - -
    __getnewargs__(...)
    - -
    __hash__(...)
    x.__hash__() <==> hash(x)
    - -
    __hex__(...)
    x.__hex__() <==> hex(x)
    - -
    __int__(...)
    x.__int__() <==> int(x)
    - -
    __invert__(...)
    x.__invert__() <==> ~x
    - -
    __long__(...)
    x.__long__() <==> long(x)
    - -
    __lshift__(...)
    x.__lshift__(y) <==> x<<y
    - -
    __mod__(...)
    x.__mod__(y) <==> x%y
    - -
    __mul__(...)
    x.__mul__(y) <==> x*y
    - -
    __neg__(...)
    x.__neg__() <==> -x
    - -
    __nonzero__(...)
    x.__nonzero__() <==> x != 0
    - -
    __oct__(...)
    x.__oct__() <==> oct(x)
    - -
    __or__(...)
    x.__or__(y) <==> x|y
    - -
    __pos__(...)
    x.__pos__() <==> +x
    - -
    __pow__(...)
    x.__pow__(y[, z]) <==> pow(x, y[, z])
    - -
    __radd__(...)
    x.__radd__(y) <==> y+x
    - -
    __rand__(...)
    x.__rand__(y) <==> y&x
    - -
    __rdiv__(...)
    x.__rdiv__(y) <==> y/x
    - -
    __rdivmod__(...)
    x.__rdivmod__(y) <==> divmod(y, x)
    - -
    __repr__(...)
    x.__repr__() <==> repr(x)
    - -
    __rfloordiv__(...)
    x.__rfloordiv__(y) <==> y//x
    - -
    __rlshift__(...)
    x.__rlshift__(y) <==> y<<x
    - -
    __rmod__(...)
    x.__rmod__(y) <==> y%x
    - -
    __rmul__(...)
    x.__rmul__(y) <==> y*x
    - -
    __ror__(...)
    x.__ror__(y) <==> y|x
    - -
    __rpow__(...)
    y.__rpow__(x[, z]) <==> pow(x, y[, z])
    - -
    __rrshift__(...)
    x.__rrshift__(y) <==> y>>x
    - -
    __rshift__(...)
    x.__rshift__(y) <==> x>>y
    - -
    __rsub__(...)
    x.__rsub__(y) <==> y-x
    - -
    __rtruediv__(...)
    x.__rtruediv__(y) <==> y/x
    - -
    __rxor__(...)
    x.__rxor__(y) <==> y^x
    - -
    __str__(...)
    x.__str__() <==> str(x)
    - -
    __sub__(...)
    x.__sub__(y) <==> x-y
    - -
    __truediv__(...)
    x.__truediv__(y) <==> x/y
    - -
    __xor__(...)
    x.__xor__(y) <==> x^y
    - -
    -Data and other attributes defined here:
    -
    __new__ = <built-in method __new__ of type object>
    T.__new__(S, ...) -> a new object with type S, a subtype of T
    - -

    - - - - - - - -
     
    -class object
       The most base type
     
     

    - - - - - - - -
     
    -open = class file(object)
       file(name[, mode[, buffering]]) -> file object

    -Open a file.  The mode can be 'r', 'w' or 'a' for reading (default),
    -writing or appending.  The file will be created if it doesn't exist
    -when opened for writing or appending; it will be truncated when
    -opened for writing.  Add a 'b' to the mode for binary files.
    -Add a '+' to the mode to allow simultaneous reading and writing.
    -If the buffering argument is given, 0 means unbuffered, 1 means line
    -buffered, and larger numbers specify the buffer size.
    -Add a 'U' to mode to open the file for input with universal newline
    -support.  Any line ending in the input file will be seen as a '\n'
    -in Python.  Also, a file so opened gains the attribute 'newlines';
    -the value for this attribute is one of None (no newline read yet),
    -'\r', '\n', '\r\n' or a tuple containing all the newline types seen.

    -'U' cannot be combined with 'w' or '+' mode.

    -Note:  open() is an alias for file().
     
     Methods defined here:
    -
    __delattr__(...)
    x.__delattr__('name') <==> del x.name
    - -
    __getattribute__(...)
    x.__getattribute__('name') <==> x.name
    - -
    __init__(...)
    x.__init__(...) initializes x; see x.__class__.__doc__ for signature
    - -
    __iter__(...)
    x.__iter__() <==> iter(x)
    - -
    __repr__(...)
    x.__repr__() <==> repr(x)
    - -
    __setattr__(...)
    x.__setattr__('name', value) <==> x.name = value
    - -
    close(...)
    close() -> None or (perhaps) an integer.  Close the file.

    -Sets data attribute .closed to True.  A closed file cannot be used for
    -further I/O operations.  close() may be called more than once without
    -error.  Some kinds of file objects (for example, opened by popen())
    -may return an exit status upon closing.
    - -
    fileno(...)
    fileno() -> integer "file descriptor".

    -This is needed for lower-level file interfaces, such os.read().
    - -
    flush(...)
    flush() -> None.  Flush the internal I/O buffer.
    - -
    isatty(...)
    isatty() -> true or false.  True if the file is connected to a tty device.
    - -
    next(...)
    x.next() -> the next value, or raise StopIteration
    - -
    read(...)
    read([size]) -> read at most size bytes, returned as a string.

    -If the size argument is negative or omitted, read until EOF is reached.
    -Notice that when in non-blocking mode, less data than what was requested
    -may be returned, even if no size parameter was given.
    - -
    readinto(...)
    readinto() -> Undocumented.  Don't use this; it may go away.
    - -
    readline(...)
    readline([size]) -> next line from the file, as a string.

    -Retain newline.  A non-negative size argument limits the maximum
    -number of bytes to return (an incomplete line may be returned then).
    -Return an empty string at EOF.
    - -
    readlines(...)
    readlines([size]) -> list of strings, each a line from the file.

    -Call readline() repeatedly and return a list of the lines so read.
    -The optional size argument, if given, is an approximate bound on the
    -total number of bytes in the lines returned.
    - -
    seek(...)
    seek(offset[, whence]) -> None.  Move to new file position.

    -Argument offset is a byte count.  Optional argument whence defaults to
    -0 (offset from start of file, offset should be >= 0); other values are 1
    -(move relative to current position, positive or negative), and 2 (move
    -relative to end of file, usually negative, although many platforms allow
    -seeking beyond the end of a file).  If the file is opened in text mode,
    -only offsets returned by tell() are legal.  Use of other offsets causes
    -undefined behavior.
    -Note that not all file objects are seekable.
    - -
    tell(...)
    tell() -> current file position, an integer (may be a long integer).
    - -
    truncate(...)
    truncate([size]) -> None.  Truncate the file to at most size bytes.

    -Size defaults to the current file position, as returned by tell().
    - -
    write(...)
    write(str) -> None.  Write string str to file.

    -Note that due to buffering, flush() or close() may be needed before
    -the file on disk reflects the data written.
    - -
    writelines(...)
    writelines(sequence_of_strings) -> None.  Write the strings to the file.

    -Note that newlines are not added.  The sequence can be any iterable object
    -producing strings. This is equivalent to calling write() for each string.
    - -
    xreadlines(...)
    xreadlines() -> returns self.

    -For backward compatibility. File objects now include the performance
    -optimizations previously implemented in the xreadlines module.
    - -
    -Data and other attributes defined here:
    -
    __new__ = <built-in method __new__ of type object>
    T.__new__(S, ...) -> a new object with type S, a subtype of T
    - -
    closed = <attribute 'closed' of 'file' objects>
    True if the file is closed
    - -
    encoding = <member 'encoding' of 'file' objects>
    file encoding
    - -
    mode = <member 'mode' of 'file' objects>
    file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)
    - -
    name = <member 'name' of 'file' objects>
    file name
    - -
    newlines = <attribute 'newlines' of 'file' objects>
    end-of-line convention used in this file
    - -
    softspace = <member 'softspace' of 'file' objects>
    flag indicating that a space needs to be printed; used by print
    - -

    - - - - - - - -
     
    -class property(object)
       property(fget=None, fset=None, fdel=None, doc=None) -> property attribute

    -fget is a function to be used for getting an attribute value, and likewise
    -fset is a function for setting, and fdel a function for del'ing, an
    -attribute.  Typical use is to define a managed attribute x:
    -class C(object):
    -    def getx(self): return self.__x
    -    def setx(self, value): self.__x = value
    -    def delx(self): del self.__x
    -    x = property(getx, setx, delx, "I'm the 'x' property.")
     
     Methods defined here:
    -
    __delete__(...)
    descr.__delete__(obj)
    - -
    __get__(...)
    descr.__get__(obj[, type]) -> value
    - -
    __getattribute__(...)
    x.__getattribute__('name') <==> x.name
    - -
    __init__(...)
    x.__init__(...) initializes x; see x.__class__.__doc__ for signature
    - -
    __set__(...)
    descr.__set__(obj, value)
    - -
    -Data and other attributes defined here:
    -
    __new__ = <built-in method __new__ of type object>
    T.__new__(S, ...) -> a new object with type S, a subtype of T
    - -
    fdel = <member 'fdel' of 'property' objects>
    - -
    fget = <member 'fget' of 'property' objects>
    - -
    fset = <member 'fset' of 'property' objects>
    - -

    - - - - - - - -
     
    -class reversed(object)
       reversed(sequence) -> reverse iterator over values of the sequence

    -Return a reverse iterator
     
     Methods defined here:
    -
    __getattribute__(...)
    x.__getattribute__('name') <==> x.name
    - -
    __iter__(...)
    x.__iter__() <==> iter(x)
    - -
    __len__(...)
    x.__len__() <==> len(x)
    - -
    next(...)
    x.next() -> the next value, or raise StopIteration
    - -
    -Data and other attributes defined here:
    -
    __new__ = <built-in method __new__ of type object>
    T.__new__(S, ...) -> a new object with type S, a subtype of T
    - -

    - - - - - - - -
     
    -class set(object)
       set(iterable) --> set object

    -Build an unordered collection.
     
     Methods defined here:
    -
    __and__(...)
    x.__and__(y) <==> x&y
    - -
    __cmp__(...)
    x.__cmp__(y) <==> cmp(x,y)
    - -
    __contains__(...)
    x.__contains__(y) <==> y in x.
    - -
    __eq__(...)
    x.__eq__(y) <==> x==y
    - -
    __ge__(...)
    x.__ge__(y) <==> x>=y
    - -
    __getattribute__(...)
    x.__getattribute__('name') <==> x.name
    - -
    __gt__(...)
    x.__gt__(y) <==> x>y
    - -
    __hash__(...)
    x.__hash__() <==> hash(x)
    - -
    __iand__(...)
    x.__iand__(y) <==> x&y
    - -
    __init__(...)
    x.__init__(...) initializes x; see x.__class__.__doc__ for signature
    - -
    __ior__(...)
    x.__ior__(y) <==> x|y
    - -
    __isub__(...)
    x.__isub__(y) <==> x-y
    - -
    __iter__(...)
    x.__iter__() <==> iter(x)
    - -
    __ixor__(...)
    x.__ixor__(y) <==> x^y
    - -
    __le__(...)
    x.__le__(y) <==> x<=y
    - -
    __len__(...)
    x.__len__() <==> len(x)
    - -
    __lt__(...)
    x.__lt__(y) <==> x<y
    - -
    __ne__(...)
    x.__ne__(y) <==> x!=y
    - -
    __or__(...)
    x.__or__(y) <==> x|y
    - -
    __rand__(...)
    x.__rand__(y) <==> y&x
    - -
    __reduce__(...)
    Return state information for pickling.
    - -
    __repr__(...)
    x.__repr__() <==> repr(x)
    - -
    __ror__(...)
    x.__ror__(y) <==> y|x
    - -
    __rsub__(...)
    x.__rsub__(y) <==> y-x
    - -
    __rxor__(...)
    x.__rxor__(y) <==> y^x
    - -
    __sub__(...)
    x.__sub__(y) <==> x-y
    - -
    __xor__(...)
    x.__xor__(y) <==> x^y
    - -
    add(...)
    Add an element to a set.

    -This has no effect if the element is already present.
    - -
    clear(...)
    Remove all elements from this set.
    - -
    copy(...)
    Return a shallow copy of a set.
    - -
    difference(...)
    Return the difference of two sets as a new set.

    -(i.e. all elements that are in this set but not the other.)
    - -
    difference_update(...)
    Remove all elements of another set from this set.
    - -
    discard(...)
    Remove an element from a set if it is a member.

    -If the element is not a member, do nothing.
    - -
    intersection(...)
    Return the intersection of two sets as a new set.

    -(i.e. all elements that are in both sets.)
    - -
    intersection_update(...)
    Update a set with the intersection of itself and another.
    - -
    issubset(...)
    Report whether another set contains this set.
    - -
    issuperset(...)
    Report whether this set contains another set.
    - -
    pop(...)
    Remove and return an arbitrary set element.
    - -
    remove(...)
    Remove an element from a set; it must be a member.

    -If the element is not a member, raise a KeyError.
    - -
    symmetric_difference(...)
    Return the symmetric difference of two sets as a new set.

    -(i.e. all elements that are in exactly one of the sets.)
    - -
    symmetric_difference_update(...)
    Update a set with the symmetric difference of itself and another.
    - -
    union(...)
    Return the union of two sets as a new set.

    -(i.e. all elements that are in either set.)
    - -
    update(...)
    Update a set with the union of itself and another.
    - -
    -Data and other attributes defined here:
    -
    __new__ = <built-in method __new__ of type object>
    T.__new__(S, ...) -> a new object with type S, a subtype of T
    - -

    - - - - - - - -
     
    -class slice(object)
       slice([start,] stop[, step])

    -Create a slice object.  This is used for extended slicing (e.g. a[0:10:2]).
     
     Methods defined here:
    -
    __cmp__(...)
    x.__cmp__(y) <==> cmp(x,y)
    - -
    __getattribute__(...)
    x.__getattribute__('name') <==> x.name
    - -
    __hash__(...)
    x.__hash__() <==> hash(x)
    - -
    __repr__(...)
    x.__repr__() <==> repr(x)
    - -
    indices(...)
    S.indices(len) -> (start, stop, stride)

    -Assuming a sequence of length len, calculate the start and stop
    -indices, and the stride length of the extended slice described by
    -S. Out of bounds indices are clipped in a manner consistent with the
    -handling of normal slices.
    - -
    -Data and other attributes defined here:
    -
    __new__ = <built-in method __new__ of type object>
    T.__new__(S, ...) -> a new object with type S, a subtype of T
    - -
    start = <member 'start' of 'slice' objects>
    - -
    step = <member 'step' of 'slice' objects>
    - -
    stop = <member 'stop' of 'slice' objects>
    - -

    - - - - - - - -
     
    -class staticmethod(object)
       staticmethod(function) -> method

    -Convert a function to be a static method.

    -A static method does not receive an implicit first argument.
    -To declare a static method, use this idiom:

    -     class C:
    -         def f(arg1, arg2, ...): ...
    -         f = staticmethod(f)

    -It can be called either on the class (e.g. C.f()) or on an instance
    -(e.g. C().f()).  The instance is ignored except for its class.

    -Static methods in Python are similar to those found in Java or C++.
    -For a more advanced concept, see the classmethod builtin.
     
     Methods defined here:
    -
    __get__(...)
    descr.__get__(obj[, type]) -> value
    - -
    __getattribute__(...)
    x.__getattribute__('name') <==> x.name
    - -
    __init__(...)
    x.__init__(...) initializes x; see x.__class__.__doc__ for signature
    - -
    -Data and other attributes defined here:
    -
    __new__ = <built-in method __new__ of type object>
    T.__new__(S, ...) -> a new object with type S, a subtype of T
    - -

    - - - - - - - -
     
    -class str(basestring)
       str(object) -> string

    -Return a nice string representation of the object.
    -If the argument is a string, the return value is the same object.
     
     
    Method resolution order:
    -
    str
    -
    basestring
    -
    object
    -
    -
    -Methods defined here:
    -
    __add__(...)
    x.__add__(y) <==> x+y
    - -
    __contains__(...)
    x.__contains__(y) <==> y in x
    - -
    __eq__(...)
    x.__eq__(y) <==> x==y
    - -
    __ge__(...)
    x.__ge__(y) <==> x>=y
    - -
    __getattribute__(...)
    x.__getattribute__('name') <==> x.name
    - -
    __getitem__(...)
    x.__getitem__(y) <==> x[y]
    - -
    __getnewargs__(...)
    - -
    __getslice__(...)
    x.__getslice__(i, j) <==> x[i:j]

    -Use of negative indices is not supported.
    - -
    __gt__(...)
    x.__gt__(y) <==> x>y
    - -
    __hash__(...)
    x.__hash__() <==> hash(x)
    - -
    __le__(...)
    x.__le__(y) <==> x<=y
    - -
    __len__(...)
    x.__len__() <==> len(x)
    - -
    __lt__(...)
    x.__lt__(y) <==> x<y
    - -
    __mod__(...)
    x.__mod__(y) <==> x%y
    - -
    __mul__(...)
    x.__mul__(n) <==> x*n
    - -
    __ne__(...)
    x.__ne__(y) <==> x!=y
    - -
    __repr__(...)
    x.__repr__() <==> repr(x)
    - -
    __rmod__(...)
    x.__rmod__(y) <==> y%x
    - -
    __rmul__(...)
    x.__rmul__(n) <==> n*x
    - -
    __str__(...)
    x.__str__() <==> str(x)
    - -
    capitalize(...)
    S.capitalize() -> string

    -Return a copy of the string S with only its first character
    -capitalized.
    - -
    center(...)
    S.center(width[, fillchar]) -> string

    -Return S centered in a string of length width. Padding is
    -done using the specified fill character (default is a space)
    - -
    count(...)
    S.count(sub[, start[, end]]) -> int

    -Return the number of occurrences of substring sub in string
    -S[start:end].  Optional arguments start and end are
    -interpreted as in slice notation.
    - -
    decode(...)
    S.decode([encoding[,errors]]) -> object

    -Decodes S using the codec registered for encoding. encoding defaults
    -to the default encoding. errors may be given to set a different error
    -handling scheme. Default is 'strict' meaning that encoding errors raise
    -a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
    -as well as any other name registerd with codecs.register_error that is
    -able to handle UnicodeDecodeErrors.
    - -
    encode(...)
    S.encode([encoding[,errors]]) -> object

    -Encodes S using the codec registered for encoding. encoding defaults
    -to the default encoding. errors may be given to set a different error
    -handling scheme. Default is 'strict' meaning that encoding errors raise
    -a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and
    -'xmlcharrefreplace' as well as any other name registered with
    -codecs.register_error that is able to handle UnicodeEncodeErrors.
    - -
    endswith(...)
    S.endswith(suffix[, start[, end]]) -> bool

    -Return True if S ends with the specified suffix, False otherwise.
    -With optional start, test S beginning at that position.
    -With optional end, stop comparing S at that position.
    - -
    expandtabs(...)
    S.expandtabs([tabsize]) -> string

    -Return a copy of S where all tab characters are expanded using spaces.
    -If tabsize is not given, a tab size of 8 characters is assumed.
    - -
    find(...)
    S.find(sub [,start [,end]]) -> int

    -Return the lowest index in S where substring sub is found,
    -such that sub is contained within s[start,end].  Optional
    -arguments start and end are interpreted as in slice notation.

    -Return -1 on failure.
    - -
    index(...)
    S.index(sub [,start [,end]]) -> int

    -Like S.find() but raise ValueError when the substring is not found.
    - -
    isalnum(...)
    S.isalnum() -> bool

    -Return True if all characters in S are alphanumeric
    -and there is at least one character in S, False otherwise.
    - -
    isalpha(...)
    S.isalpha() -> bool

    -Return True if all characters in S are alphabetic
    -and there is at least one character in S, False otherwise.
    - -
    isdigit(...)
    S.isdigit() -> bool

    -Return True if all characters in S are digits
    -and there is at least one character in S, False otherwise.
    - -
    islower(...)
    S.islower() -> bool

    -Return True if all cased characters in S are lowercase and there is
    -at least one cased character in S, False otherwise.
    - -
    isspace(...)
    S.isspace() -> bool

    -Return True if all characters in S are whitespace
    -and there is at least one character in S, False otherwise.
    - -
    istitle(...)
    S.istitle() -> bool

    -Return True if S is a titlecased string and there is at least one
    -character in S, i.e. uppercase characters may only follow uncased
    -characters and lowercase characters only cased ones. Return False
    -otherwise.
    - -
    isupper(...)
    S.isupper() -> bool

    -Return True if all cased characters in S are uppercase and there is
    -at least one cased character in S, False otherwise.
    - -
    join(...)
    S.join(sequence) -> string

    -Return a string which is the concatenation of the strings in the
    -sequence.  The separator between elements is S.
    - -
    ljust(...)
    S.ljust(width[, fillchar]) -> string

    -Return S left justified in a string of length width. Padding is
    -done using the specified fill character (default is a space).
    - -
    lower(...)
    S.lower() -> string

    -Return a copy of the string S converted to lowercase.
    - -
    lstrip(...)
    S.lstrip([chars]) -> string or unicode

    -Return a copy of the string S with leading whitespace removed.
    -If chars is given and not None, remove characters in chars instead.
    -If chars is unicode, S will be converted to unicode before stripping
    - -
    replace(...)
    S.replace (old, new[, count]) -> string

    -Return a copy of string S with all occurrences of substring
    -old replaced by new.  If the optional argument count is
    -given, only the first count occurrences are replaced.
    - -
    rfind(...)
    S.rfind(sub [,start [,end]]) -> int

    -Return the highest index in S where substring sub is found,
    -such that sub is contained within s[start,end].  Optional
    -arguments start and end are interpreted as in slice notation.

    -Return -1 on failure.
    - -
    rindex(...)
    S.rindex(sub [,start [,end]]) -> int

    -Like S.rfind() but raise ValueError when the substring is not found.
    - -
    rjust(...)
    S.rjust(width[, fillchar]) -> string

    -Return S right justified in a string of length width. Padding is
    -done using the specified fill character (default is a space)
    - -
    rsplit(...)
    S.rsplit([sep [,maxsplit]]) -> list of strings

    -Return a list of the words in the string S, using sep as the
    -delimiter string, starting at the end of the string and working
    -to the front.  If maxsplit is given, at most maxsplit splits are
    -done. If sep is not specified or is None, any whitespace string
    -is a separator.
    - -
    rstrip(...)
    S.rstrip([chars]) -> string or unicode

    -Return a copy of the string S with trailing whitespace removed.
    -If chars is given and not None, remove characters in chars instead.
    -If chars is unicode, S will be converted to unicode before stripping
    - -
    split(...)
    S.split([sep [,maxsplit]]) -> list of strings

    -Return a list of the words in the string S, using sep as the
    -delimiter string.  If maxsplit is given, at most maxsplit
    -splits are done. If sep is not specified or is None, any
    -whitespace string is a separator.
    - -
    splitlines(...)
    S.splitlines([keepends]) -> list of strings

    -Return a list of the lines in S, breaking at line boundaries.
    -Line breaks are not included in the resulting list unless keepends
    -is given and true.
    - -
    startswith(...)
    S.startswith(prefix[, start[, end]]) -> bool

    -Return True if S starts with the specified prefix, False otherwise.
    -With optional start, test S beginning at that position.
    -With optional end, stop comparing S at that position.
    - -
    strip(...)
    S.strip([chars]) -> string or unicode

    -Return a copy of the string S with leading and trailing
    -whitespace removed.
    -If chars is given and not None, remove characters in chars instead.
    -If chars is unicode, S will be converted to unicode before stripping
    - -
    swapcase(...)
    S.swapcase() -> string

    -Return a copy of the string S with uppercase characters
    -converted to lowercase and vice versa.
    - -
    title(...)
    S.title() -> string

    -Return a titlecased version of S, i.e. words start with uppercase
    -characters, all remaining cased characters have lowercase.
    - -
    translate(...)
    S.translate(table [,deletechars]) -> string

    -Return a copy of the string S, where all characters occurring
    -in the optional argument deletechars are removed, and the
    -remaining characters have been mapped through the given
    -translation table, which must be a string of length 256.
    - -
    upper(...)
    S.upper() -> string

    -Return a copy of the string S converted to uppercase.
    - -
    zfill(...)
    S.zfill(width) -> string

    -Pad a numeric string S with zeros on the left, to fill a field
    -of the specified width.  The string S is never truncated.
    - -
    -Data and other attributes defined here:
    -
    __new__ = <built-in method __new__ of type object>
    T.__new__(S, ...) -> a new object with type S, a subtype of T
    - -

    - - - - - - - -
     
    -class super(object)
       super(type) -> unbound super object
    -super(type, obj) -> bound super object; requires isinstance(obj, type)
    -super(type, type2) -> bound super object; requires issubclass(type2, type)
    -Typical use to call a cooperative superclass method:
    -class C(B):
    -    def meth(self, arg):
    -        super(C, self).meth(arg)
     
     Methods defined here:
    -
    __get__(...)
    descr.__get__(obj[, type]) -> value
    - -
    __getattribute__(...)
    x.__getattribute__('name') <==> x.name
    - -
    __init__(...)
    x.__init__(...) initializes x; see x.__class__.__doc__ for signature
    - -
    __repr__(...)
    x.__repr__() <==> repr(x)
    - -
    -Data and other attributes defined here:
    -
    __new__ = <built-in method __new__ of type object>
    T.__new__(S, ...) -> a new object with type S, a subtype of T
    - -
    __self__ = <member '__self__' of 'super' objects>
    the instance invoking super(); may be None
    - -
    __self_class__ = <member '__self_class__' of 'super' objects>
    the type of the instance invoking super(); may be None
    - -
    __thisclass__ = <member '__thisclass__' of 'super' objects>
    the class invoking super()
    - -

    - - - - - - - -
     
    -class tuple(object)
       tuple() -> an empty tuple
    -tuple(sequence) -> tuple initialized from sequence's items

    -If the argument is a tuple, the return value is the same object.
     
     Methods defined here:
    -
    __add__(...)
    x.__add__(y) <==> x+y
    - -
    __contains__(...)
    x.__contains__(y) <==> y in x
    - -
    __eq__(...)
    x.__eq__(y) <==> x==y
    - -
    __ge__(...)
    x.__ge__(y) <==> x>=y
    - -
    __getattribute__(...)
    x.__getattribute__('name') <==> x.name
    - -
    __getitem__(...)
    x.__getitem__(y) <==> x[y]
    - -
    __getnewargs__(...)
    - -
    __getslice__(...)
    x.__getslice__(i, j) <==> x[i:j]

    -Use of negative indices is not supported.
    - -
    __gt__(...)
    x.__gt__(y) <==> x>y
    - -
    __hash__(...)
    x.__hash__() <==> hash(x)
    - -
    __iter__(...)
    x.__iter__() <==> iter(x)
    - -
    __le__(...)
    x.__le__(y) <==> x<=y
    - -
    __len__(...)
    x.__len__() <==> len(x)
    - -
    __lt__(...)
    x.__lt__(y) <==> x<y
    - -
    __mul__(...)
    x.__mul__(n) <==> x*n
    - -
    __ne__(...)
    x.__ne__(y) <==> x!=y
    - -
    __repr__(...)
    x.__repr__() <==> repr(x)
    - -
    __rmul__(...)
    x.__rmul__(n) <==> n*x
    - -
    -Data and other attributes defined here:
    -
    __new__ = <built-in method __new__ of type object>
    T.__new__(S, ...) -> a new object with type S, a subtype of T
    - -

    - - - - - - - -
     
    -class type(object)
       type(object) -> the object's type
    -type(name, bases, dict) -> a new type
     
     Methods defined here:
    -
    __call__(...)
    x.__call__(...) <==> x(...)
    - -
    __cmp__(...)
    x.__cmp__(y) <==> cmp(x,y)
    - -
    __delattr__(...)
    x.__delattr__('name') <==> del x.name
    - -
    __getattribute__(...)
    x.__getattribute__('name') <==> x.name
    - -
    __hash__(...)
    x.__hash__() <==> hash(x)
    - -
    __repr__(...)
    x.__repr__() <==> repr(x)
    - -
    __setattr__(...)
    x.__setattr__('name', value) <==> x.name = value
    - -
    __subclasses__(...)
    __subclasses__() -> list of immediate subclasses
    - -
    mro(...)
    mro() -> list
    -return a type's method resolution order
    - -
    -Data and other attributes defined here:
    -
    __base__ = <type 'object'>
    - -
    __bases__ = (<type 'object'>,)
    - -
    __basicsize__ = 832
    - -
    __dict__ = <dictproxy object>
    - -
    __dictoffset__ = 256
    - -
    __flags__ = 21995
    - -
    __itemsize__ = 32
    - -
    __mro__ = (<type 'type'>, <type 'object'>)
    - -
    __new__ = <built-in method __new__ of type object>
    T.__new__(S, ...) -> a new object with type S, a subtype of T
    - -
    __weakrefoffset__ = 360
    - -

    - - - - - - - -
     
    -class unicode(basestring)
       unicode(string [, encoding[, errors]]) -> object

    -Create a new Unicode object from the given encoded string.
    -encoding defaults to the current default string encoding.
    -errors can be 'strict', 'replace' or 'ignore' and defaults to 'strict'.
     
     
    Method resolution order:
    -
    unicode
    -
    basestring
    -
    object
    -
    -
    -Methods defined here:
    -
    __add__(...)
    x.__add__(y) <==> x+y
    - -
    __cmp__(...)
    x.__cmp__(y) <==> cmp(x,y)
    - -
    __contains__(...)
    x.__contains__(y) <==> y in x
    - -
    __getattribute__(...)
    x.__getattribute__('name') <==> x.name
    - -
    __getitem__(...)
    x.__getitem__(y) <==> x[y]
    - -
    __getnewargs__(...)
    - -
    __getslice__(...)
    x.__getslice__(i, j) <==> x[i:j]

    -Use of negative indices is not supported.
    - -
    __hash__(...)
    x.__hash__() <==> hash(x)
    - -
    __len__(...)
    x.__len__() <==> len(x)
    - -
    __mod__(...)
    x.__mod__(y) <==> x%y
    - -
    __mul__(...)
    x.__mul__(n) <==> x*n
    - -
    __repr__(...)
    x.__repr__() <==> repr(x)
    - -
    __rmod__(...)
    x.__rmod__(y) <==> y%x
    - -
    __rmul__(...)
    x.__rmul__(n) <==> n*x
    - -
    __str__(...)
    x.__str__() <==> str(x)
    - -
    capitalize(...)
    S.capitalize() -> unicode

    -Return a capitalized version of S, i.e. make the first character
    -have upper case.
    - -
    center(...)
    S.center(width[, fillchar]) -> unicode

    -Return S centered in a Unicode string of length width. Padding is
    -done using the specified fill character (default is a space)
    - -
    count(...)
    S.count(sub[, start[, end]]) -> int

    -Return the number of occurrences of substring sub in Unicode string
    -S[start:end].  Optional arguments start and end are
    -interpreted as in slice notation.
    - -
    decode(...)
    S.decode([encoding[,errors]]) -> string or unicode

    -Decodes S using the codec registered for encoding. encoding defaults
    -to the default encoding. errors may be given to set a different error
    -handling scheme. Default is 'strict' meaning that encoding errors raise
    -a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
    -as well as any other name registerd with codecs.register_error that is
    -able to handle UnicodeDecodeErrors.
    - -
    encode(...)
    S.encode([encoding[,errors]]) -> string or unicode

    -Encodes S using the codec registered for encoding. encoding defaults
    -to the default encoding. errors may be given to set a different error
    -handling scheme. Default is 'strict' meaning that encoding errors raise
    -a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and
    -'xmlcharrefreplace' as well as any other name registered with
    -codecs.register_error that can handle UnicodeEncodeErrors.
    - -
    endswith(...)
    S.endswith(suffix[, start[, end]]) -> bool

    -Return True if S ends with the specified suffix, False otherwise.
    -With optional start, test S beginning at that position.
    -With optional end, stop comparing S at that position.
    - -
    expandtabs(...)
    S.expandtabs([tabsize]) -> unicode

    -Return a copy of S where all tab characters are expanded using spaces.
    -If tabsize is not given, a tab size of 8 characters is assumed.
    - -
    find(...)
    S.find(sub [,start [,end]]) -> int

    -Return the lowest index in S where substring sub is found,
    -such that sub is contained within s[start,end].  Optional
    -arguments start and end are interpreted as in slice notation.

    -Return -1 on failure.
    - -
    index(...)
    S.index(sub [,start [,end]]) -> int

    -Like S.find() but raise ValueError when the substring is not found.
    - -
    isalnum(...)
    S.isalnum() -> bool

    -Return True if all characters in S are alphanumeric
    -and there is at least one character in S, False otherwise.
    - -
    isalpha(...)
    S.isalpha() -> bool

    -Return True if all characters in S are alphabetic
    -and there is at least one character in S, False otherwise.
    - -
    isdecimal(...)
    S.isdecimal() -> bool

    -Return True if there are only decimal characters in S,
    -False otherwise.
    - -
    isdigit(...)
    S.isdigit() -> bool

    -Return True if all characters in S are digits
    -and there is at least one character in S, False otherwise.
    - -
    islower(...)
    S.islower() -> bool

    -Return True if all cased characters in S are lowercase and there is
    -at least one cased character in S, False otherwise.
    - -
    isnumeric(...)
    S.isnumeric() -> bool

    -Return True if there are only numeric characters in S,
    -False otherwise.
    - -
    isspace(...)
    S.isspace() -> bool

    -Return True if all characters in S are whitespace
    -and there is at least one character in S, False otherwise.
    - -
    istitle(...)
    S.istitle() -> bool

    -Return True if S is a titlecased string and there is at least one
    -character in S, i.e. upper- and titlecase characters may only
    -follow uncased characters and lowercase characters only cased ones.
    -Return False otherwise.
    - -
    isupper(...)
    S.isupper() -> bool

    -Return True if all cased characters in S are uppercase and there is
    -at least one cased character in S, False otherwise.
    - -
    join(...)
    S.join(sequence) -> unicode

    -Return a string which is the concatenation of the strings in the
    -sequence.  The separator between elements is S.
    - -
    ljust(...)
    S.ljust(width[, fillchar]) -> int

    -Return S left justified in a Unicode string of length width. Padding is
    -done using the specified fill character (default is a space).
    - -
    lower(...)
    S.lower() -> unicode

    -Return a copy of the string S converted to lowercase.
    - -
    lstrip(...)
    S.lstrip([chars]) -> unicode

    -Return a copy of the string S with leading whitespace removed.
    -If chars is given and not None, remove characters in chars instead.
    -If chars is a str, it will be converted to unicode before stripping
    - -
    replace(...)
    S.replace (old, new[, maxsplit]) -> unicode

    -Return a copy of S with all occurrences of substring
    -old replaced by new.  If the optional argument maxsplit is
    -given, only the first maxsplit occurrences are replaced.
    - -
    rfind(...)
    S.rfind(sub [,start [,end]]) -> int

    -Return the highest index in S where substring sub is found,
    -such that sub is contained within s[start,end].  Optional
    -arguments start and end are interpreted as in slice notation.

    -Return -1 on failure.
    - -
    rindex(...)
    S.rindex(sub [,start [,end]]) -> int

    -Like S.rfind() but raise ValueError when the substring is not found.
    - -
    rjust(...)
    S.rjust(width[, fillchar]) -> unicode

    -Return S right justified in a Unicode string of length width. Padding is
    -done using the specified fill character (default is a space).
    - -
    rsplit(...)
    S.rsplit([sep [,maxsplit]]) -> list of strings

    -Return a list of the words in S, using sep as the
    -delimiter string, starting at the end of the string and
    -working to the front.  If maxsplit is given, at most maxsplit
    -splits are done. If sep is not specified, any whitespace string
    -is a separator.
    - -
    rstrip(...)
    S.rstrip([chars]) -> unicode

    -Return a copy of the string S with trailing whitespace removed.
    -If chars is given and not None, remove characters in chars instead.
    -If chars is a str, it will be converted to unicode before stripping
    - -
    split(...)
    S.split([sep [,maxsplit]]) -> list of strings

    -Return a list of the words in S, using sep as the
    -delimiter string.  If maxsplit is given, at most maxsplit
    -splits are done. If sep is not specified or is None,
    -any whitespace string is a separator.
    - -
    splitlines(...)
    S.splitlines([keepends]]) -> list of strings

    -Return a list of the lines in S, breaking at line boundaries.
    -Line breaks are not included in the resulting list unless keepends
    -is given and true.
    - -
    startswith(...)
    S.startswith(prefix[, start[, end]]) -> bool

    -Return True if S starts with the specified prefix, False otherwise.
    -With optional start, test S beginning at that position.
    -With optional end, stop comparing S at that position.
    - -
    strip(...)
    S.strip([chars]) -> unicode

    -Return a copy of the string S with leading and trailing
    -whitespace removed.
    -If chars is given and not None, remove characters in chars instead.
    -If chars is a str, it will be converted to unicode before stripping
    - -
    swapcase(...)
    S.swapcase() -> unicode

    -Return a copy of S with uppercase characters converted to lowercase
    -and vice versa.
    - -
    title(...)
    S.title() -> unicode

    -Return a titlecased version of S, i.e. words start with title case
    -characters, all remaining cased characters have lower case.
    - -
    translate(...)
    S.translate(table) -> unicode

    -Return a copy of the string S, where all characters have been mapped
    -through the given translation table, which must be a mapping of
    -Unicode ordinals to Unicode ordinals, Unicode strings or None.
    -Unmapped characters are left untouched. Characters mapped to None
    -are deleted.
    - -
    upper(...)
    S.upper() -> unicode

    -Return a copy of S converted to uppercase.
    - -
    zfill(...)
    S.zfill(width) -> unicode

    -Pad a numeric string x with zeros on the left, to fill a field
    -of the specified width. The string x is never truncated.
    - -
    -Data and other attributes defined here:
    -
    __new__ = <built-in method __new__ of type object>
    T.__new__(S, ...) -> a new object with type S, a subtype of T
    - -

    - - - - - - - -
     
    -class xrange(object)
       xrange([start,] stop[, step]) -> xrange object

    -Like range(), but instead of returning a list, returns an object that
    -generates the numbers in the range on demand.  For looping, this is 
    -slightly faster than range() and more memory efficient.
     
     Methods defined here:
    -
    __getattribute__(...)
    x.__getattribute__('name') <==> x.name
    - -
    __getitem__(...)
    x.__getitem__(y) <==> x[y]
    - -
    __iter__(...)
    x.__iter__() <==> iter(x)
    - -
    __len__(...)
    x.__len__() <==> len(x)
    - -
    __repr__(...)
    x.__repr__() <==> repr(x)
    - -
    __reversed__(...)
    Returns a reverse iterator.
    - -
    -Data and other attributes defined here:
    -
    __new__ = <built-in method __new__ of type object>
    T.__new__(S, ...) -> a new object with type S, a subtype of T
    - -

    - - - - - -
     
    -Functions
           
    __import__(...)
    __import__(name, globals, locals, fromlist) -> module

    -Import a module.  The globals are only used to determine the context;
    -they are not modified.  The locals are currently unused.  The fromlist
    -should be a list of names to emulate ``from name import ...'', or an
    -empty list to emulate ``import name''.
    -When importing a module from a package, note that __import__('A.B', ...)
    -returns package A when fromlist is empty, but its submodule B when
    -fromlist is not empty.
    -
    abs(...)
    abs(number) -> number

    -Return the absolute value of the argument.
    -
    apply(...)
    apply(object[, args[, kwargs]]) -> value

    -Call a callable object with positional arguments taken from the tuple args,
    -and keyword arguments taken from the optional dictionary kwargs.
    -Note that classes are callable, as are instances with a __call__() method.

    -Deprecated since release 2.3. Instead, use the extended call syntax:
    -    function(*args, **keywords).
    -
    callable(...)
    callable(object) -> bool

    -Return whether the object is callable (i.e., some kind of function).
    -Note that classes are callable, as are instances with a __call__() method.
    -
    chr(...)
    chr(i) -> character

    -Return a string of one character with ordinal i; 0 <= i < 256.
    -
    cmp(...)
    cmp(x, y) -> integer

    -Return negative if x<y, zero if x==y, positive if x>y.
    -
    coerce(...)
    coerce(x, y) -> (x1, y1)

    -Return a tuple consisting of the two numeric arguments converted to
    -a common type, using the same rules as used by arithmetic operations.
    -If coercion is not possible, raise TypeError.
    -
    compile(...)
    compile(source, filename, mode[, flags[, dont_inherit]]) -> code object

    -Compile the source string (a Python module, statement or expression)
    -into a code object that can be executed by the exec statement or eval().
    -The filename will be used for run-time error messages.
    -The mode must be 'exec' to compile a module, 'single' to compile a
    -single (interactive) statement, or 'eval' to compile an expression.
    -The flags argument, if present, controls which future statements influence
    -the compilation of the code.
    -The dont_inherit argument, if non-zero, stops the compilation inheriting
    -the effects of any future statements in effect in the code calling
    -compile; if absent or zero these statements do influence the compilation,
    -in addition to any features explicitly specified.
    -
    delattr(...)
    delattr(object, name)

    -Delete a named attribute on an objectdelattr(x, 'y') is equivalent to
    -``del x.y''.
    -
    dir(...)
    dir([object]) -> list of strings

    -Return an alphabetized list of names comprising (some of) the attributes
    -of the given object, and of attributes reachable from it:

    -No argument:  the names in the current scope.
    -Module object:  the module attributes.
    -Type or class object:  its attributes, and recursively the attributes of
    -    its bases.
    -Otherwise:  its attributes, its class's attributes, and recursively the
    -    attributes of its class's base classes.
    -
    divmod(...)
    divmod(x, y) -> (div, mod)

    -Return the tuple ((x-x%y)/y, x%y).  Invariant: div*y + mod == x.
    -
    eval(...)
    eval(source[, globals[, locals]]) -> value

    -Evaluate the source in the context of globals and locals.
    -The source may be a string representing a Python expression
    -or a code object as returned by compile().
    -The globals must be a dictionary and locals can be any mappping,
    -defaulting to the current globals and locals.
    -If only globals is given, locals defaults to it.
    -
    execfile(...)
    execfile(filename[, globals[, locals]])

    -Read and execute a Python script from a file.
    -The globals and locals are dictionaries, defaulting to the current
    -globals and locals.  If only globals is given, locals defaults to it.
    -
    filter(...)
    filter(function or None, sequence) -> listtuple, or string

    -Return those items of sequence for which function(item) is true.  If
    -function is None, return the items that are true.  If sequence is a tuple
    -or string, return the same type, else return a list.
    -
    getattr(...)
    getattr(object, name[, default]) -> value

    -Get a named attribute from an objectgetattr(x, 'y') is equivalent to x.y.
    -When a default argument is given, it is returned when the attribute doesn't
    -exist; without it, an exception is raised in that case.
    -
    globals(...)
    globals() -> dictionary

    -Return the dictionary containing the current scope's global variables.
    -
    hasattr(...)
    hasattr(object, name) -> bool

    -Return whether the object has an attribute with the given name.
    -(This is done by calling getattr(object, name) and catching exceptions.)
    -
    hash(...)
    hash(object) -> integer

    -Return a hash value for the object.  Two objects with the same value have
    -the same hash value.  The reverse is not necessarily true, but likely.
    -
    hex(...)
    hex(number) -> string

    -Return the hexadecimal representation of an integer or long integer.
    -
    id(...)
    id(object) -> integer

    -Return the identity of an object.  This is guaranteed to be unique among
    -simultaneously existing objects.  (Hint: it's the object's memory address.)
    -
    input(...)
    input([prompt]) -> value

    -Equivalent to eval(raw_input(prompt)).
    -
    intern(...)
    intern(string) -> string

    -``Intern'' the given string.  This enters the string in the (global)
    -table of interned strings whose purpose is to speed up dictionary lookups.
    -Return the string itself or the previously interned string object with the
    -same value.
    -
    isinstance(...)
    isinstance(object, class-or-type-or-tuple) -> bool

    -Return whether an object is an instance of a class or of a subclass thereof.
    -With a type as second argument, return whether that is the object's type.
    -The form using a tupleisinstance(x, (A, B, ...)), is a shortcut for
    -isinstance(x, A) or isinstance(x, B) or ... (etc.).
    -
    issubclass(...)
    issubclass(C, B) -> bool

    -Return whether class C is a subclass (i.e., a derived class) of class B.
    -When using a tuple as the second argument issubclass(X, (A, B, ...)),
    -is a shortcut for issubclass(X, A) or issubclass(X, B) or ... (etc.).
    -
    iter(...)
    iter(collection) -> iterator
    -iter(callable, sentinel) -> iterator

    -Get an iterator from an object.  In the first form, the argument must
    -supply its own iterator, or be a sequence.
    -In the second form, the callable is called until it returns the sentinel.
    -
    len(...)
    len(object) -> integer

    -Return the number of items of a sequence or mapping.
    -
    locals(...)
    locals() -> dictionary

    -Update and return a dictionary containing the current scope's local variables.
    -
    map(...)
    map(function, sequence[, sequence, ...]) -> list

    -Return a list of the results of applying the function to the items of
    -the argument sequence(s).  If more than one sequence is given, the
    -function is called with an argument list consisting of the corresponding
    -item of each sequence, substituting None for missing values when not all
    -sequences have the same length.  If the function is None, return a list of
    -the items of the sequence (or a list of tuples if more than one sequence).
    -
    max(...)
    max(sequence) -> value
    -max(a, b, c, ...) -> value

    -With a single sequence argument, return its largest item.
    -With two or more arguments, return the largest argument.
    -
    min(...)
    min(sequence) -> value
    -min(a, b, c, ...) -> value

    -With a single sequence argument, return its smallest item.
    -With two or more arguments, return the smallest argument.
    -
    oct(...)
    oct(number) -> string

    -Return the octal representation of an integer or long integer.
    -
    ord(...)
    ord(c) -> integer

    -Return the integer ordinal of a one-character string.
    -
    pow(...)
    pow(x, y[, z]) -> number

    -With two arguments, equivalent to x**y.  With three arguments,
    -equivalent to (x**y) % z, but may be more efficient (e.g. for longs).
    -
    range(...)
    range([start,] stop[, step]) -> list of integers

    -Return a list containing an arithmetic progression of integers.
    -range(i, j) returns [i, i+1, i+2, ..., j-1]; start (!) defaults to 0.
    -When step is given, it specifies the increment (or decrement).
    -For example, range(4) returns [0, 1, 2, 3].  The end point is omitted!
    -These are exactly the valid indices for a list of 4 elements.
    -
    raw_input(...)
    raw_input([prompt]) -> string

    -Read a string from standard input.  The trailing newline is stripped.
    -If the user hits EOF (Unix: Ctl-D, Windows: Ctl-Z+Return), raise EOFError.
    -On Unix, GNU readline is used if enabled.  The prompt string, if given,
    -is printed without a trailing newline before reading.
    -
    reduce(...)
    reduce(function, sequence[, initial]) -> value

    -Apply a function of two arguments cumulatively to the items of a sequence,
    -from left to right, so as to reduce the sequence to a single value.
    -For example, reduce(lambda x, y: x+y, [1, 2, 3, 4, 5]) calculates
    -((((1+2)+3)+4)+5).  If initial is present, it is placed before the items
    -of the sequence in the calculation, and serves as a default when the
    -sequence is empty.
    -
    reload(...)
    reload(module) -> module

    -Reload the module.  The module must have been successfully imported before.
    -
    repr(...)
    repr(object) -> string

    -Return the canonical string representation of the object.
    -For most object types, eval(repr(object)) == object.
    -
    round(...)
    round(number[, ndigits]) -> floating point number

    -Round a number to a given precision in decimal digits (default 0 digits).
    -This always returns a floating point number.  Precision may be negative.
    -
    setattr(...)
    setattr(object, name, value)

    -Set a named attribute on an objectsetattr(x, 'y', v) is equivalent to
    -``x.y = v''.
    -
    sorted(...)
    sorted(iterable, cmp=None, key=None, reverse=False) --> new sorted list
    -
    unichr(...)
    unichr(i) -> Unicode character

    -Return a Unicode string of one character with ordinal i; 0 <= i <= 0x10ffff.
    -
    vars(...)
    vars([object]) -> dictionary

    -Without arguments, equivalent to locals().
    -With an argument, equivalent to object.__dict__.
    -
    zip(...)
    zip(seq1 [, seq2 [...]]) -> [(seq1[0], seq2[0] ...), (...)]

    -Return a list of tuples, where each tuple contains the i-th element
    -from each of the argument sequences.  The returned list is truncated
    -in length to the length of the shortest argument sequence.
    -

    - - - - - -
     
    -Data
           Ellipsis = Ellipsis
    -False = False
    -None = None
    -NotImplemented = NotImplemented
    -True = True
    -__debug__ = True
    -copyright = Copyright (c) 2001-2005 Python Software Foundati...ematisch Centrum, Amsterdam. -All Rights Reserved.
    -credits = Thanks to CWI, CNRI, BeOpen.com, Zope Corpor...opment. See www.python.org for more information.
    -exit = 'Use Ctrl-D (i.e. EOF) to exit.'
    -help = Type help() for interactive help, or help(object) for help about object.
    -license = Type license() to see the full license text
    -quit = 'Use Ctrl-D (i.e. EOF) to exit.'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.baseparser.html simpleparse-2.2.0/doc/pydoc/simpleparse.baseparser.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.baseparser.html 2006-02-19 01:05:12.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.baseparser.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,320 +0,0 @@ - - -Python: module simpleparse.baseparser - - - - -
     
    - 
    simpleparse.baseparser
    index
    /home/mcfletch/pylive/simpleparse/baseparser.py
    -

    Base class for real-world parsers (such as parser.Parser)

    -

    - - - - - -
     
    -Modules
           
    simpleparse.stt.TextTools.mxTextTools.mxTextTools
    -
    string
    -
    time
    -
    types
    -

    - - - - - -
     
    -Classes
           
    -
    BaseParser -
    -

    - - - - - - - -
     
    -class BaseParser
       Class on which real-world parsers build

    -Normally you use a sub-class of this class, such as
    -simpleparser.parser.Parser
     
     Methods defined here:
    -
    buildProcessor(self)
    Build default processor object for this parser class

    -The default implementation returns None.  The processor
    -can either implement the "method source" API (just provides
    -information about Callouts and the like), or the processor
    -API and the method-source API.  The processor API merely
    -requires that the object be callable, and have the signature:

    -        object( (success, children, nextPosition), buffer)

    -(Note: your object can treat the first item as a single tuple
    -if it likes).

    -See: simpleparse.processor module for details.
    - -
    buildTagger(self, name, processor)
    Build the tag-table for the parser

    -This method must be implemented by your base class and _not_
    -call the implementation here.
    - -
    parse(self, data, production=None, processor=None, start=0, stop=None)
    Parse data with production "production" of this parser

    -data -- data to be parsed, a Python string, for now
    -production -- optional string specifying a non-default production to use
    -        for parsing data
    -processor -- optional pointer to a Processor or MethodSource object for
    -        use in determining reporting format and/or post-processing the results
    -        of the parsing pass.  Can be None if neither is desired (default)
    -start -- starting index for the parsing, default 0
    -stop -- stoping index for the parsing, default len(data)
    - -
    resetBeforeParse(self)
    Called just before the parser's parse method starts working,

    -Allows you to set up special-purpose structures, such as stacks
    -or local storage values.  There is no base implementation.  The
    -base implementation does nothing.
    - -

    - - - - - -
     
    -Functions
           
    BMS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    CharSet(...)
    CharSet(definition)

    -Create a character set matching object from the string
    -
    FS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    FSType = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    TagTable(...)
    TagTable(definition[,cachable=1])
    -
    TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    UnicodeTagTable(...)
    TagTable(definition[,cachable=1])
    -
    charsplit(...)
    charsplit(text,char,start=0,stop=len(text))

    -Split text[start:stop] into substrings at char and
    -return the result as list of strings.
    -
    cmp(...)
    cmp(a,b)

    -Compare two valid taglist tuples w/r to their slice
    -position; this is useful for sorting joinlists.
    -
    hex2str(...)
    hex2str(text)

    -Return text interpreted as two byte HEX values converted
    -to a string.
    -
    isascii(...)
    isascii(text,start=0,stop=len(text))

    -Return 1/0 depending on whether text only contains ASCII
    -characters.
    -
    join(...)
    join(joinlist,sep='',start=0,stop=len(joinlist))

    -Copy snippets from different strings together producing a
    -new string
    -The first argument must be a list of tuples or strings;
    -tuples must be of the form (string,l,r[,...]) and turn out
    -as string[l:r]
    -NOTE: the syntax used for negative slices is different
    -than the Python standard: -1 corresponds to the first
    -character *after* the string, e.g. ('Example',0,-1) gives
    -'Example' and not 'Exampl', like in Python
    -sep is an optional separator string, start and stop
    -define the slice of joinlist that is taken into accont.
    -
    joinlist(...)
    joinlist(text,list,start=0,stop=len(text))

    -Takes a list of tuples (replacement,l,r,...) and produces
    -a taglist suitable for join() which creates a copy
    -of text where every slice [l:r] is replaced by the
    -given replacement
    -- the list must be sorted using cmp() as compare function
    -- it may not contain overlapping slices
    -- the slices may not contain negative indices
    -- if the taglist cannot contain overlapping slices, you can
    -  give this function the taglist produced by tag() directly
    -  (sorting is not needed, as the list will already be sorted)
    -- start and stop set the slice to work in, i.e. text[start:stop]
    -
    lower(...)
    lower(text)

    -Return text converted to lower case.
    -
    prefix(...)
    prefix(text,prefixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -prefix out of the tuple of strings given in prefixes.
    -If no prefix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    set(...)
    set(string,logic=1)

    -Returns a character set for string: a bit encoded version
    -of the characters occurring in string.
    -- logic can be set to 0 if all characters *not* in string
    -  should go into the set
    -
    setfind(...)
    setfind(text,set,start=0,stop=len(text))

    -Find the first occurence of any character from set in
    -text[start:stop]
    - set must be a string obtained with set()
    -DEPRECATED: use CharSet().search() instead.
    -
    setsplit(...)
    setsplit(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set,
    -omitting the splitting parts and empty substrings.
    -set must be a string obtained from set()
    -DEPRECATED: use CharSet().split() instead.
    -
    setsplitx(...)
    setsplitx(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set, so
    -that every second entry consists only of characters in set.
    -set must be a string obtained with set()
    -DEPRECATED: use CharSet().splitx() instead.
    -
    setstrip(...)
    setstrip(text,set,start=0,stop=len(text),mode=0)

    -Strip all characters in text[start:stop] appearing in set.
    -mode indicates where to strip (<0: left; =0: left and right;
    ->0: right). set must be a string obtained with set()
    -DEPRECATED: use CharSet().strip() instead.
    -
    splitat(...)
    splitat(text,char,nth=1,start=0,stop=len(text))

    -Split text[start:stop] into two substrings at the nth
    -occurance of char and return the result as 2-tuple. If the
    -character is not found, the second string is empty. nth may
    -be negative: the search is then done from the right and the
    -first string is empty in case the character is not found.
    -
    str2hex(...)
    str2hex(text)

    -Return text converted to a string consisting of two byte
    -HEX values.
    -
    suffix(...)
    suffix(text,suffixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -suffix out of the tuple of strings given in suffixes.
    -If no suffix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    tag(...)
    tag(text,tagtable,sliceleft=0,sliceright=len(text),taglist=[],context=None) 
    -Produce a tag list for a string, given a tag-table
    -- returns a tuple (success, taglist, nextindex)
    -- if taglist == None, then no taglist is created
    -
    upper(...)
    upper(text)

    -Return text converted to upper case.
    -

    - - - - - -
     
    -Data
           A2Z = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    -A2Z_charset = <Character Set object for 'A-Z'>
    -A2Z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -AllIn = 11
    -AllInCharSet = 41
    -AllInSet = 31
    -AllNotIn = 12
    -AppendMatch = 2048
    -AppendTagobj = 1024
    -AppendToTagobj = 512
    -BOYERMOORE = 0
    -Break = 0
    -Call = 201
    -CallArg = 202
    -CallTag = 256
    -EOF = 101
    -FASTSEARCH = 1
    -Fail = 100
    -Here = 1
    -Is = 13
    -IsIn = 14
    -IsInCharSet = 42
    -IsInSet = 32
    -IsNot = 15
    -IsNotIn = 15
    -Jump = 100
    -JumpTarget = 104
    -LookAhead = 4096
    -Loop = 205
    -LoopControl = 206
    -MatchFail = -1000000
    -MatchOk = 1000000
    -Move = 103
    -NoWord = 211
    -Reset = -1
    -Skip = 102
    -SubTable = 207
    -SubTableInList = 208
    -TRIVIAL = 2
    -Table = 203
    -TableInList = 204
    -ThisTable = 999
    -To = 0
    -ToBOF = 0
    -ToEOF = -1
    -Umlaute = '\xc4\xd6\xdc'
    -Umlaute_charset = <Character Set object for '\xc4\xd6\xdc'>
    -Word = 21
    -WordEnd = 23
    -WordStart = 22
    -a2z = 'abcdefghijklmnopqrstuvwxyz'
    -a2z_charset = <Character Set object for 'a-z'>
    -a2z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
    -alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'>
    -alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alphanumeric = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
    -alphanumeric_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'>
    -alphanumeric_set = '\x00\x00\x00\x00\x00\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -any = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -any_charset = <Character Set object for '\x00-\xff'>
    -any_set = '\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -formfeed = '\x0c'
    -formfeed_charset = <Character Set object for '\x0c'>
    -german_alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'
    -german_alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTU...hijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'>
    -german_alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00@\x90\x10\x00@\x10'
    -id2cmd = {-1000000: 'MatchFail', -1: 'ToEOF', 0: 'Fail/Jump', 1: 'Here', 11: 'AllIn', 12: 'AllNotIn', 13: 'Is', 14: 'IsIn', 15: 'IsNotIn', 21: 'Word', ...}
    -newline = '\r\n'
    -newline_charset = <Character Set object for '\r\n'>
    -newline_set = '\x00$\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -nonwhitespace_charset = <Character Set object for '^ \t\x0b\r\n\x0c'>
    -nonwhitespace_set = '\xff\xc1\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -number = '0123456789'
    -number_charset = <Character Set object for '0-9'>
    -number_set = '\x00\x00\x00\x00\x00\x00\xff\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -sFindWord = 213
    -sWordEnd = 212
    -sWordStart = 211
    -tagtable_cache = {(46912536021760, 0): <String Tag Table object>, (46912540134840, 0): <String Tag Table object>, (46912541410080, 0): <String Tag Table object>, (46912541454848, 0): <String Tag Table object>, (46912541455136, 0): <String Tag Table object>, (46912541455208, 0): <String Tag Table object>, (46912541489264, 0): <String Tag Table object>, (46912541566016, 0): <String Tag Table object>, (46912543903688, 0): <String Tag Table object>, (46912543908136, 0): <String Tag Table object>, ...}
    -to_lower = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -to_upper = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -umlaute = '\xe4\xf6\xfc\xdf'
    -umlaute_charset = <Character Set object for '\xe4\xf6\xfc\xdf'>
    -white = ' \t\x0b'
    -white_charset = <Character Set object for ' \t\x0b'>
    -white_set = '\x00\x02\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -whitespace = ' \t\x0b\r\n\x0c'
    -whitespace_charset = <Character Set object for ' \t\x0b\r\n\x0c'>
    -whitespace_set = '\x00&\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.common.calendar_names.html simpleparse-2.2.0/doc/pydoc/simpleparse.common.calendar_names.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.common.calendar_names.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.common.calendar_names.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,229 +0,0 @@ - - -Python: module simpleparse.common.calendar_names - - - - -
     
    - 
    simpleparse.common.calendar_names
    index
    /home/mcfletch/pylive/simpleparse/common/calendar_names.py
    -

    Locale-specific calendar names (day-of-week and month-of-year)

    -These values are those returned by the calendar module.  Available
    -productions:

    -        locale_day_names
    -        locale_day_names_uc
    -        locale_day_names_lc
    -                Names for the days of the week

    -        locale_day_abbrs
    -        locale_day_abbrs_uc
    -        locale_day_abbrs_lc
    -                Short-forms (3 characters normally) for
    -                the days of the week.

    -        locale_month_names
    -        locale_month_names_uc
    -        locale_month_names_lc
    -                Names for the months of the year

    -        locale_month_abbrs
    -        locale_month_abbrs_uc
    -        locale_month_abbrs_lc
    -                Short-forms (3 characters normally) for
    -                the months of the year

    -Interpreters:
    -        MonthNameInterpreter
    -        DayNameInterpreter
    -                Both offer the ability to set an index other
    -                than the default (of 1) for the first item in
    -                the list.

    -

    - - - - - -
     
    -Modules
           
    calendar
    -
    simpleparse.common
    -
    simpleparse.objectgenerator
    -
    string
    -

    - - - - - -
     
    -Classes
           
    -
    NameInterpreter -
    -
    -
    DayNameInterpreter -
    MonthNameInterpreter -
    -
    -
    -

    - - - - - - - -
     
    -class DayNameInterpreter(NameInterpreter)
       Interpret a day-of-week name as an integer index

    -Pass an "offset" value to __init__ to use an offset other
    -than 1 (January = 1), normally 0 (January = 0)
     
     Data and other attributes defined here:
    -
    nameType = 'Day'
    - -
    tables = (['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'], ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'])
    - -
    -Methods inherited from NameInterpreter:
    -
    __call__(self, (tag, left, right, children), buffer)
    - -
    __init__(self, offset=1)
    - -
    -Data and other attributes inherited from NameInterpreter:
    -
    offset = 1
    - -

    - - - - - - - -
     
    -class MonthNameInterpreter(NameInterpreter)
       Interpret a month-of-year name as an integer index

    -Pass an "offset" value to __init__ to use an offset other
    -than 1 (Monday = 1), normally 0 (Monday = 0)
     
     Data and other attributes defined here:
    -
    nameType = 'Month'
    - -
    tables = (['', 'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'october', 'november', 'december'], ['', 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'])
    - -
    -Methods inherited from NameInterpreter:
    -
    __call__(self, (tag, left, right, children), buffer)
    - -
    __init__(self, offset=1)
    - -
    -Data and other attributes inherited from NameInterpreter:
    -
    offset = 1
    - -

    - - - - - -
     
    -class NameInterpreter
        Methods defined here:
    -
    __call__(self, (tag, left, right, children), buffer)
    - -
    __init__(self, offset=1)
    - -
    -Data and other attributes defined here:
    -
    offset = 1
    - -

    - - - - - -
     
    -Functions
           
    _build(name, set)
    -

    - - - - - -
     
    -Data
           c = {'locale_day_abbrs': FirstOfGroup( - children = [ - Literal( -...l( - value = 'Fri', - ), - ], -), 'locale_day_abbrs_lc': FirstOfGroup( - children = [ - Literal( -...l( - value = 'fri', - ), - ], -), 'locale_day_abbrs_uc': FirstOfGroup( - children = [ - Literal( -...l( - value = 'FRI', - ), - ], -), 'locale_day_names': FirstOfGroup( - children = [ - Literal( -... value = 'Friday', - ), - ], -), 'locale_day_names_lc': FirstOfGroup( - children = [ - Literal( -... value = 'friday', - ), - ], -), 'locale_day_names_uc': FirstOfGroup( - children = [ - Literal( -... value = 'FRIDAY', - ), - ], -), 'locale_month_abbrs': FirstOfGroup( - children = [ - Literal( -...eral( - value = '', - ), - ], -), 'locale_month_abbrs_lc': FirstOfGroup( - children = [ - Literal( -...eral( - value = '', - ), - ], -), 'locale_month_abbrs_uc': FirstOfGroup( - children = [ - Literal( -...eral( - value = '', - ), - ], -), 'locale_month_names': FirstOfGroup( - children = [ - Literal( -...eral( - value = '', - ), - ], -), ...}
    -da = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
    -dn = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
    -ma = ['', 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
    -mn = ['', 'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'october', 'november', 'december']
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.common.chartypes.html simpleparse-2.2.0/doc/pydoc/simpleparse.common.chartypes.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.common.chartypes.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.common.chartypes.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,94 +0,0 @@ - - -Python: module simpleparse.common.chartypes - - - - -
     
    - 
    simpleparse.common.chartypes
    index
    /home/mcfletch/pylive/simpleparse/common/chartypes.py
    -

    Common locale-specific character types

    -Following productions are all based on string module,
    -with the default locale specified.  The first production
    -is a single character of the class and the second a
    -repeating character version:

    -        digit, digits
    -        uppercasechar, uppercase
    -        lowercasechar, lowercase
    -        letter, letters
    -        whitespacechar, whitespace
    -        punctuationchar, punctuation
    -        octdigit, octdigits
    -        hexdigit, hexdigits
    -        printablechar, printable

    -For Python versions with the constants in the string module:
    -        ascii_letter, ascii_letters
    -        ascii_lowercasechar, ascii_lowercase
    -        ascii_uppercasechar, ascii_uppercase


    -Following are locale-specific values, both are
    -single-character values:

    -        locale_decimal_point -- locale-specific decimal seperator
    -        locale_thousands_seperator -- locale-specific "thousands" seperator
    -        
    -Others:

    -        EOF -- Matches iff parsing has reached the end of the buffer

    -There are no interpreters provided (the types are considered
    -too common to provide meaningful interpreters).

    -

    - - - - - -
     
    -Modules
           
    simpleparse.stt.TextTools
    -simpleparse.common
    -
    locale
    -simpleparse.objectgenerator
    -
    string
    -

    - - - - - -
     
    -Data
           c = {'EOF': Prebuilt( - value = ((None, 101, 1),), -), 'ascii_letter': Range( - value = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', -), 'ascii_letters': Range( - repeating = 1, - value = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', -), 'ascii_lowercase': Range( - repeating = 1, - value = 'abcdefghijklmnopqrstuvwxyz', -), 'ascii_lowercasechar': Range( - value = 'abcdefghijklmnopqrstuvwxyz', -), 'ascii_uppercase': Range( - repeating = 1, - value = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', -), 'ascii_uppercasechar': Range( - value = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', -), 'digit': Range( - value = '0123456789', -), 'digits': Range( - repeating = 1, - value = '0123456789', -), 'hexdigit': Range( - value = '0123456789abcdefABCDEF', -), ...}
    -single = 'printablechar'
    -source = 'printable'
    -value = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.common.comments.html simpleparse-2.2.0/doc/pydoc/simpleparse.common.comments.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.common.comments.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.common.comments.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,68 +0,0 @@ - - -Python: module simpleparse.common.comments - - - - -
     
    - 
    simpleparse.common.comments
    index
    /home/mcfletch/pylive/simpleparse/common/comments.py
    -

    Common comment formats

    -To process, handle the "comment" production,
    -(the specific named comment formats are all
    -expanded productions, so you won't get them
    -returned for processing).

    -        hash_comment
    -                # to EOL comments
    -        slashslash_comment
    -                // to EOL comments
    -        semicolon_comment
    -                ; to EOL comments
    -        slashbang_comment
    -        c_comment
    -                non-nesting /* */ comments
    -        slashbang_nest_comment
    -        c_nest_comment
    -                nesting /* /* */ */ comments

    -

    - - - - - -
     
    -Modules
           
    simpleparse.common.chartypes
    -
    simpleparse.common
    -
    simpleparse.objectgenerator
    -

    - - - - - -
     
    -Data
           _p = <simpleparse.parser.Parser instance>
    -c = {'c_comment': LibraryElement( - production = 'slashbang_comm...enerator.Generator instance at 0x2aaaad5dedd0>, -), 'c_nest_comment': LibraryElement( - production = 'slashbang_nest...enerator.Generator instance at 0x2aaaad5e7170>, -), 'hash_comment': LibraryElement( - production = 'hash_comment',...enerator.Generator instance at 0x2aaaad5d9128>, -), 'semicolon_comment': LibraryElement( - production = 'semicolon_comm...enerator.Generator instance at 0x2aaaad5d9128>, -), 'slashbang_comment': LibraryElement( - production = 'slashbang_comm...enerator.Generator instance at 0x2aaaad5dedd0>, -), 'slashbang_nest_comment': LibraryElement( - production = 'slashbang_nest...enerator.Generator instance at 0x2aaaad5e7170>, -), 'slashslash_comment': LibraryElement( - production = 'slashslash_com...enerator.Generator instance at 0x2aaaad5d9128>, -)}
    -ccomments = '\n### comments in format /* comment */ with no re..."*/"*\n>slashbang_comment< := \'/*\', comment, \'*/\'\n'
    -eolcomments = "\n### comment formats where the comment goes\n### ..., EOL\n>slashslash_comment< := '//', comment, EOL\n"
    -name = 'slashbang_nest_comment'
    -nccomments = '\n### nestable C comments of form /* comment /* i...comment< := comment_start, comment, comment_stop\n'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.common.html simpleparse-2.2.0/doc/pydoc/simpleparse.common.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.common.html 2006-02-19 01:05:11.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.common.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,240 +0,0 @@ - - -Python: package simpleparse.common - - - - -
     
    - 
    simpleparse.common
    index
    /home/mcfletch/pylive/simpleparse/common/__init__.py
    -

    Common (library) definitions

    -You normally use this module by importing one of our
    -sub-modules (which automatically registers itself with
    -the SOURCES list defined here).

    -Calling common.share( dictionary ) with a dictionary
    -mapping string names to element token instances will
    -make the element tokens available under those string
    -names in default parsers.  Note: a Parser can override
    -this by specifying an explicit definitionSources
    -parameter in its initialiser.

    -

    - - - - - -
     
    -Package Contents
           __init__ -- Common (library) definitions
    calendar_names -- Locale-specific calendar names (day-of-week and month-of-year)
    chartypes -- Common locale-specific character types
    comments -- Common comment formats
    iso_date -- Canonical ISO date format YYYY-MM-DDTHH:mm:SS+HH:mm
    iso_date_loose -- Somewhat Looser ISO date format YYYY-MM-DD HH:mm:SS +HH:mm
    numbers -- Samples showing the parsing of common programming-language constructs
    phonetics -- Phonetic spellings for character values
    strings -- Python string parsers with escape characters
    timezone_names -- Common timezone names (civilian, military and combined)

    - - - - - -
     
    -Functions
           
    share(dictionary)
    -

    - - - - - -
     
    -Data
           SOURCES = [{'locale_day_abbrs': FirstOfGroup( - children = [ - Literal( -...l( - value = 'Fri', - ), - ], -), 'locale_day_abbrs_lc': FirstOfGroup( - children = [ - Literal( -...l( - value = 'fri', - ), - ], -), 'locale_day_abbrs_uc': FirstOfGroup( - children = [ - Literal( -...l( - value = 'FRI', - ), - ], -), 'locale_day_names': FirstOfGroup( - children = [ - Literal( -... value = 'Friday', - ), - ], -), 'locale_day_names_lc': FirstOfGroup( - children = [ - Literal( -... value = 'friday', - ), - ], -), 'locale_day_names_uc': FirstOfGroup( - children = [ - Literal( -... value = 'FRIDAY', - ), - ], -), 'locale_month_abbrs': FirstOfGroup( - children = [ - Literal( -...eral( - value = '', - ), - ], -), 'locale_month_abbrs_lc': FirstOfGroup( - children = [ - Literal( -...eral( - value = '', - ), - ], -), 'locale_month_abbrs_uc': FirstOfGroup( - children = [ - Literal( -...eral( - value = '', - ), - ], -), 'locale_month_names': FirstOfGroup( - children = [ - Literal( -...eral( - value = '', - ), - ], -), ...}, {'EOF': Prebuilt( - value = ((None, 101, 1),), -), 'ascii_letter': Range( - value = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', -), 'ascii_letters': Range( - repeating = 1, - value = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', -), 'ascii_lowercase': Range( - repeating = 1, - value = 'abcdefghijklmnopqrstuvwxyz', -), 'ascii_lowercasechar': Range( - value = 'abcdefghijklmnopqrstuvwxyz', -), 'ascii_uppercase': Range( - repeating = 1, - value = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', -), 'ascii_uppercasechar': Range( - value = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', -), 'digit': Range( - value = '0123456789', -), 'digits': Range( - repeating = 1, - value = '0123456789', -), 'hexdigit': Range( - value = '0123456789abcdefABCDEF', -), ...}, {'c_comment': LibraryElement( - production = 'slashbang_comm...enerator.Generator instance at 0x2aaaad5dedd0>, -), 'c_nest_comment': LibraryElement( - production = 'slashbang_nest...enerator.Generator instance at 0x2aaaad5e7170>, -), 'hash_comment': LibraryElement( - production = 'hash_comment',...enerator.Generator instance at 0x2aaaad5d9128>, -), 'semicolon_comment': LibraryElement( - production = 'semicolon_comm...enerator.Generator instance at 0x2aaaad5d9128>, -), 'slashbang_comment': LibraryElement( - production = 'slashbang_comm...enerator.Generator instance at 0x2aaaad5dedd0>, -), 'slashbang_nest_comment': LibraryElement( - production = 'slashbang_nest...enerator.Generator instance at 0x2aaaad5e7170>, -), 'slashslash_comment': LibraryElement( - production = 'slashslash_com...enerator.Generator instance at 0x2aaaad5d9128>, -)}, {'binary_number': LibraryElement( - production = 'binary_number'...enerator.Generator instance at 0x2aaaad5f69e0>, -), 'float': LibraryElement( - production = 'float', - ge...enerator.Generator instance at 0x2aaaad5f69e0>, -), 'float_floatexp': LibraryElement( - production = 'float_floatexp...enerator.Generator instance at 0x2aaaad5f69e0>, -), 'hex': LibraryElement( - production = 'hex', - gene...enerator.Generator instance at 0x2aaaad5f69e0>, -), 'imaginary_number': LibraryElement( - production = 'imaginary_numb...enerator.Generator instance at 0x2aaaad5f69e0>, -), 'int': LibraryElement( - production = 'int', - gene...enerator.Generator instance at 0x2aaaad5f69e0>, -), 'int_unsigned': LibraryElement( - production = 'int_unsigned',...enerator.Generator instance at 0x2aaaad5f69e0>, -), 'number': LibraryElement( - production = 'number', - g...enerator.Generator instance at 0x2aaaad5f69e0>, -), 'number_full': LibraryElement( - production = 'number_full', -...enerator.Generator instance at 0x2aaaad5f69e0>, -)}, {'ISO_date': LibraryElement( - production = 'ISO_date', - ...enerator.Generator instance at 0x2aaaad5dca28>, -), 'ISO_date_time': LibraryElement( - production = 'ISO_date_time'...enerator.Generator instance at 0x2aaaad5dca28>, -), 'ISO_time': LibraryElement( - production = 'ISO_time', - ...enerator.Generator instance at 0x2aaaad5dca28>, -)}, {'ISO_date_loose': LibraryElement( - production = 'ISO_date_loose...enerator.Generator instance at 0x2aaaad6009e0>, -), 'ISO_date_time_loose': LibraryElement( - production = 'ISO_date_time_...enerator.Generator instance at 0x2aaaad6009e0>, -), 'ISO_time_loose': LibraryElement( - production = 'ISO_time_loose...enerator.Generator instance at 0x2aaaad6009e0>, -)}, {'military_alphabet_char': FirstOfGroup( - children = [ - Literal( -...( - value = 'Zulu', - ), - ], -), 'military_alphabet_char_lower': FirstOfGroup( - children = [ - Literal( -...( - value = 'zulu', - ), - ], -)}, {'string': LibraryElement( - production = 'string', - g...enerator.Generator instance at 0x2aaaad608d88>, -), 'string_double_quote': LibraryElement( - production = 'str', - gene...enerator.Generator instance at 0x2aaaad82b200>, -), 'string_single_quote': LibraryElement( - production = 'str', - gene...enerator.Generator instance at 0x2aaaad602680>, -), 'string_triple_double': LibraryElement( - production = 'str', - gene...enerator.Generator instance at 0x2aaaad5d95a8>, -), 'string_triple_single': LibraryElement( - production = 'str', - gene...enerator.Generator instance at 0x2aaaad827710>, -)}, {'civilian_timezone_name': FirstOfGroup( - children = [ - Literal( -... - value = 'ACSST', - ), - ], -), 'military_timezone_name': FirstOfGroup( - children = [ - Literal( -...ral( - value = 'A', - ), - ], -), 'timezone_name': FirstOfGroup( - children = [ - Literal( -...ral( - value = 'A', - ), - ], -)}]
    -__path__ = ['/home/mcfletch/pylive/simpleparse/common']
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.common.__init__.html simpleparse-2.2.0/doc/pydoc/simpleparse.common.__init__.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.common.__init__.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.common.__init__.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,40 +0,0 @@ - - -Python: module simpleparse.common.__init__ - - - - -
     
    - 
    simpleparse.common.__init__
    index
    /home/mcfletch/pylive/simpleparse/common/__init__.py
    -

    Common (library) definitions

    -You normally use this module by importing one of our
    -sub-modules (which automatically registers itself with
    -the SOURCES list defined here).

    -Calling common.share( dictionary ) with a dictionary
    -mapping string names to element token instances will
    -make the element tokens available under those string
    -names in default parsers.  Note: a Parser can override
    -this by specifying an explicit definitionSources
    -parameter in its initialiser.

    -

    - - - - - -
     
    -Functions
           
    share(dictionary)
    -

    - - - - - -
     
    -Data
           SOURCES = []
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.common.iso_date.html simpleparse-2.2.0/doc/pydoc/simpleparse.common.iso_date.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.common.iso_date.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.common.iso_date.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,148 +0,0 @@ - - -Python: module simpleparse.common.iso_date - - - - -
     
    - 
    simpleparse.common.iso_date
    index
    /home/mcfletch/pylive/simpleparse/common/iso_date.py
    -

    Canonical ISO date format YYYY-MM-DDTHH:mm:SS+HH:mm

    -This parser is _extremely_ strict, and the dates that match it,
    -though really easy to work with for the computer, are not particularly
    -readable.  See the iso_date_loose module for a slightly relaxed
    -definition which allows the "T" character to be replaced by a
    -" " character, and allows a space before the timezone offset, as well
    -as allowing the integer values to use non-0-padded integers.


    -        ISO_date -- YYYY-MM-DD format, with a month and date optional
    -        ISO_time -- HH:mm:SS format, with minutes and seconds optional
    -        ISO_date_time -- YYYY-MM-DD HH:mm:SS+HH:mm format,
    -                with time optional and TimeZone offset optional

    -Interpreter:
    -        MxInterpreter
    -                Interprets the parse tree as mx.DateTime values
    -                ISO_date and ISO_time
    -                        returns DateTime objects
    -                Time only
    -                        returns RelativeDateTime object which, when
    -                        added to a DateTime gives you the given time
    -                        within that day

    -

    - - - - - -
     
    -Modules
           
    mx.DateTime
    -simpleparse.common.chartypes
    -
    simpleparse.common
    -simpleparse.common.numbers
    -
    simpleparse.objectgenerator
    -

    - - - - - -
     
    -Classes
           
    -
    DispatchProcessor(Processor) -
    -
    -
    MxInterpreter -
    -
    -
    -

    - - - - - - - -
     
    -class MxInterpreter(DispatchProcessor)
       Interpret a parsed ISO_date_time_loose in GMT/UTC time or localtime
     
     
    Method resolution order:
    -
    MxInterpreter
    -
    DispatchProcessor
    -
    Processor
    -
    MethodSource
    -
    -
    -Methods defined here:
    -
    ISO_date(self, (tag, left, right, sublist), buffer)
    Interpret the ISO date format
    - -
    ISO_date_time(self, (tag, left, right, sublist), buffer)
    Interpret the loose ISO date + time format
    - -
    ISO_time(self, (tag, left, right, sublist), buffer)
    Interpret the ISO time format
    - -
    __init__(self, inputLocal=1, returnLocal=1)
    - -
    offset(self, (tag, left, right, sublist), buffer)
    Calculate the time zone offset as a date-time delta
    - -
    offset_sign(self, (tag, left, right, sublist), buffer)
    Interpret the offset sign as a multiplier
    - -
    -Data and other attributes defined here:
    -
    dateName = 'ISO_date'
    - -
    day = <IntInterpreter object @ 46912541558904>
    Interpret an integer (or unsigned integer) string as an integer
    - -
    hour = <IntInterpreter object @ 46912541558904>
    Interpret an integer (or unsigned integer) string as an integer
    - -
    integer = <IntInterpreter object @ 46912541558904>
    Interpret an integer (or unsigned integer) string as an integer
    - -
    minute = <IntInterpreter object @ 46912541558904>
    Interpret an integer (or unsigned integer) string as an integer
    - -
    month = <IntInterpreter object @ 46912541558904>
    Interpret an integer (or unsigned integer) string as an integer
    - -
    offset_hour = <IntInterpreter object @ 46912541558904>
    Interpret an integer (or unsigned integer) string as an integer
    - -
    offset_minute = <IntInterpreter object @ 46912541558904>
    Interpret an integer (or unsigned integer) string as an integer
    - -
    second = <IntInterpreter object @ 46912541558904>
    Interpret an integer (or unsigned integer) string as an integer
    - -
    timeName = 'ISO_time'
    - -
    year = <IntInterpreter object @ 46912541558904>
    Interpret an integer (or unsigned integer) string as an integer
    - -
    -Methods inherited from DispatchProcessor:
    -
    __call__(self, value, buffer)
    Process the results of the parsing run over buffer

    -Value can either be: (success, tags, next) for a top-level
    -production, or (tag, left, right, children) for a non-top
    -production.
    - -
    -Methods inherited from Processor:
    -
    __repr__(self)
    Return a representation of the class
    - -

    - - - - - -
     
    -Data
           _p = <simpleparse.parser.Parser instance>
    -c = {'ISO_date': LibraryElement( - production = 'ISO_date', - ...enerator.Generator instance at 0x2aaaad5dca28>, -), 'ISO_date_time': LibraryElement( - production = 'ISO_date_time'...enerator.Generator instance at 0x2aaaad5dca28>, -), 'ISO_time': LibraryElement( - production = 'ISO_time', - ...enerator.Generator instance at 0x2aaaad5dca28>, -)}
    -declaration = '\nyear := digit,digit,digit,digit\nmonth ...ate_time := ISO_date, ([T], ISO_time)?, offset?\n'
    -haveMX = 1
    -name = 'ISO_date_time'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.common.iso_date_loose.html simpleparse-2.2.0/doc/pydoc/simpleparse.common.iso_date_loose.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.common.iso_date_loose.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.common.iso_date_loose.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,139 +0,0 @@ - - -Python: module simpleparse.common.iso_date_loose - - - - -
     
    - 
    simpleparse.common.iso_date_loose
    index
    /home/mcfletch/pylive/simpleparse/common/iso_date_loose.py
    -

    Somewhat Looser ISO date format YYYY-MM-DD HH:mm:SS +HH:mm

    -        ISO_date_loose -- YYYY-MM-DD format, with a month and day optional,
    -                month or day may be specified without leading 0
    -        ISO_time_loose -- HH:mm:SS format, with minutes and seconds optional
    -                all numbers may be specified without leading 0
    -        ISO_date_time_loose -- YYYY-MM-DD HH:mm:SS +HH:mm format,
    -                with time optional and TimeZone offset optional,
    -                same format for date and time as above

    -Interpreter:
    -        MxInterpreter
    -                Interprets the parse tree as mx.DateTime values
    -                Date and DateTime -> DateTime objects
    -                Time only -> RelativeDateTime

    -

    - - - - - -
     
    -Modules
           
    mx.DateTime
    -simpleparse.common.chartypes
    -
    simpleparse.common
    -simpleparse.common.numbers
    -
    simpleparse.objectgenerator
    -

    - - - - - -
     
    -Classes
           
    -
    DispatchProcessor(Processor) -
    -
    -
    MxInterpreter -
    -
    -
    -

    - - - - - - - -
     
    -class MxInterpreter(DispatchProcessor)
       Interpret a parsed ISO_date_time_loose in GMT/UTC time or localtime
     
     
    Method resolution order:
    -
    MxInterpreter
    -
    DispatchProcessor
    -
    Processor
    -
    MethodSource
    -
    -
    -Methods defined here:
    -
    ISO_date_loose(self, (tag, left, right, sublist), buffer)
    Interpret the loose ISO date format
    - -
    ISO_date_time_loose(self, (tag, left, right, sublist), buffer)
    Interpret the loose ISO date + time format
    - -
    ISO_time_loose(self, (tag, left, right, sublist), buffer)
    Interpret the loose ISO time format
    - -
    __init__(self, inputLocal=1, returnLocal=1)
    - -
    offset(self, (tag, left, right, sublist), buffer)
    Calculate the time zone offset as a date-time delta
    - -
    offset_sign(self, (tag, left, right, sublist), buffer)
    Interpret the offset sign as a multiplier
    - -
    -Data and other attributes defined here:
    -
    dateName = 'ISO_date_loose'
    - -
    day = <IntInterpreter object @ 46912541567528>
    Interpret an integer (or unsigned integer) string as an integer
    - -
    float = <FloatInterpreter object @ 46912541567744>
    Interpret a standard float value as a float
    - -
    hour = <IntInterpreter object @ 46912541567528>
    Interpret an integer (or unsigned integer) string as an integer
    - -
    int = <IntInterpreter object @ 46912541567528>
    Interpret an integer (or unsigned integer) string as an integer
    - -
    minute = <IntInterpreter object @ 46912541567528>
    Interpret an integer (or unsigned integer) string as an integer
    - -
    month = <IntInterpreter object @ 46912541567528>
    Interpret an integer (or unsigned integer) string as an integer
    - -
    offset_hour = <IntInterpreter object @ 46912541567528>
    Interpret an integer (or unsigned integer) string as an integer
    - -
    offset_minute = <IntInterpreter object @ 46912541567528>
    Interpret an integer (or unsigned integer) string as an integer
    - -
    second = <FloatInterpreter object @ 46912541567744>
    Interpret a standard float value as a float
    - -
    timeName = 'ISO_time_loose'
    - -
    year = <IntInterpreter object @ 46912541567528>
    Interpret an integer (or unsigned integer) string as an integer
    - -
    -Methods inherited from DispatchProcessor:
    -
    __call__(self, value, buffer)
    Process the results of the parsing run over buffer

    -Value can either be: (success, tags, next) for a top-level
    -production, or (tag, left, right, children) for a non-top
    -production.
    - -
    -Methods inherited from Processor:
    -
    __repr__(self)
    Return a representation of the class
    - -

    - - - - - -
     
    -Data
           _p = <simpleparse.parser.Parser instance>
    -c = {'ISO_date_loose': LibraryElement( - production = 'ISO_date_loose...enerator.Generator instance at 0x2aaaad6009e0>, -), 'ISO_date_time_loose': LibraryElement( - production = 'ISO_date_time_...enerator.Generator instance at 0x2aaaad6009e0>, -), 'ISO_time_loose': LibraryElement( - production = 'ISO_time_loose...enerator.Generator instance at 0x2aaaad6009e0>, -)}
    -declaration = "\n<date_separator> := [-]\n<time_separator> := ':'...te_loose, ([T ], ISO_time_loose)?, [ ]?, offset?\n"
    -haveMX = 1
    -name = 'ISO_date_loose'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.common.numbers.html simpleparse-2.2.0/doc/pydoc/simpleparse.common.numbers.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.common.numbers.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.common.numbers.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,272 +0,0 @@ - - -Python: module simpleparse.common.numbers - - - - -
     
    - 
    simpleparse.common.numbers
    index
    /home/mcfletch/pylive/simpleparse/common/numbers.py
    -

    Samples showing the parsing of common programming-language constructs

    -numbers
    -        integers
    -                int
    -                int_unsigned
    -                
    -        hexidecimal integers
    -                hex
    -                
    -        floats (including exponents, requring a '.' in the literal)
    -                float
    -                        floats, with optional integer-only exponents
    -                float_floatexp
    -                        floats, with optional integer or float exponents

    -        imaginary_number
    -                (float/int),[jJ]

    -        number
    -                hex/float/int
    -        number_full
    -                binary_number/imaginary_number/hex/float/int

    -        binary_number
    -                signed binary number
    -                        1001001b or 1001001B bit-field format,
    -                        optional sign
    -                        can be used with number as (binary_number/number)

    -Interpreters:

    -        IntInterpreter
    -                int, int_unsigned
    -        HexInterpreter
    -                hex
    -        FloatInterpreter
    -                float
    -        FloatFloatExpInterpreter
    -                float_floatexp
    -        BinaryInterpreter
    -                binary_number
    -        ImaginaryInterpreter
    -                imaginary_number

    -

    - - - - - -
     
    -Modules
           
    simpleparse.common.chartypes
    -simpleparse.common
    -
    simpleparse.objectgenerator
    -string
    -
    sys
    -

    - - - - - -
     
    -Classes
           
    -
    DispatchProcessor(Processor) -
    -
    -
    BinaryInterpreter -
    FloatFloatExpInterpreter -
    FloatInterpreter -
    HexInterpreter -
    ImaginaryInterpreter -
    IntInterpreter -
    -
    -
    -

    - - - - - -
     
    -class BinaryInterpreter(DispatchProcessor)
        
    Method resolution order:
    -
    BinaryInterpreter
    -
    DispatchProcessor
    -
    Processor
    -
    MethodSource
    -
    -
    -Methods defined here:
    -
    __call__(self, (tag, left, right, children), buffer)
    Interpret a bitfield set as an integer
    - -
    -Methods inherited from Processor:
    -
    __repr__(self)
    Return a representation of the class
    - -

    - - - - - - - -
     
    -class FloatFloatExpInterpreter(DispatchProcessor)
       Interpret a float string as an integer value
    -Note: we're allowing float exponentiation, which
    -gives you a nice way to write 2e.5
     
     
    Method resolution order:
    -
    FloatFloatExpInterpreter
    -
    DispatchProcessor
    -
    Processor
    -
    MethodSource
    -
    -
    -Methods defined here:
    -
    __call__(self, (tag, left, right, children), buffer)
    - -
    -Methods inherited from Processor:
    -
    __repr__(self)
    Return a representation of the class
    - -

    - - - - - - - -
     
    -class FloatInterpreter(DispatchProcessor)
       Interpret a standard float value as a float
     
     
    Method resolution order:
    -
    FloatInterpreter
    -
    DispatchProcessor
    -
    Processor
    -
    MethodSource
    -
    -
    -Methods defined here:
    -
    __call__(self, (tag, left, right, children), buffer)
    - -
    -Methods inherited from Processor:
    -
    __repr__(self)
    Return a representation of the class
    - -

    - - - - - - - -
     
    -class HexInterpreter(DispatchProcessor)
       Interpret a hexidecimal integer string as an integer value
     
     
    Method resolution order:
    -
    HexInterpreter
    -
    DispatchProcessor
    -
    Processor
    -
    MethodSource
    -
    -
    -Methods defined here:
    -
    __call__(self, (tag, left, right, children), buffer)
    - -
    -Methods inherited from Processor:
    -
    __repr__(self)
    Return a representation of the class
    - -

    - - - - - -
     
    -class ImaginaryInterpreter(DispatchProcessor)
        
    Method resolution order:
    -
    ImaginaryInterpreter
    -
    DispatchProcessor
    -
    Processor
    -
    MethodSource
    -
    -
    -Methods defined here:
    -
    __call__(self, (tag, left, right, children), buffer)
    Interpret a bitfield set as an integer, not sure this algo
    -is correct, will see I suppose
    - -
    -Data and other attributes defined here:
    -
    map = {'float': <FloatInterpreter object @ 46912541411376>, 'int': <IntInterpreter object @ 46912541411088>}
    - -
    -Methods inherited from Processor:
    -
    __repr__(self)
    Return a representation of the class
    - -

    - - - - - - - -
     
    -class IntInterpreter(DispatchProcessor)
       Interpret an integer (or unsigned integer) string as an integer
     
     
    Method resolution order:
    -
    IntInterpreter
    -
    DispatchProcessor
    -
    Processor
    -
    MethodSource
    -
    -
    -Methods defined here:
    -
    __call__(self, (tag, left, right, children), buffer)
    - -
    -Methods inherited from Processor:
    -
    __repr__(self)
    Return a representation of the class
    - -

    - - - - - -
     
    -Functions
           
    _toInt(s, base)
    -
    _toLong(s, base)
    -

    - - - - - -
     
    -Data
           _p = <simpleparse.parser.Parser instance>
    -c = {'binary_number': LibraryElement( - production = 'binary_number'...enerator.Generator instance at 0x2aaaad5f69e0>, -), 'float': LibraryElement( - production = 'float', - ge...enerator.Generator instance at 0x2aaaad5f69e0>, -), 'float_floatexp': LibraryElement( - production = 'float_floatexp...enerator.Generator instance at 0x2aaaad5f69e0>, -), 'hex': LibraryElement( - production = 'hex', - gene...enerator.Generator instance at 0x2aaaad5f69e0>, -), 'imaginary_number': LibraryElement( - production = 'imaginary_numb...enerator.Generator instance at 0x2aaaad5f69e0>, -), 'int': LibraryElement( - production = 'int', - gene...enerator.Generator instance at 0x2aaaad5f69e0>, -), 'int_unsigned': LibraryElement( - production = 'int_unsigned',...enerator.Generator instance at 0x2aaaad5f69e0>, -), 'number': LibraryElement( - production = 'number', - g...enerator.Generator instance at 0x2aaaad5f69e0>, -), 'number_full': LibraryElement( - production = 'number_full', -...enerator.Generator instance at 0x2aaaad5f69e0>, -)}
    -declaration = '\n# sample for parsing integer and float numbers\n... := binary_number/imaginary_number/hex/float/int\n'
    -name = 'number_full'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.common.phonetics.html simpleparse-2.2.0/doc/pydoc/simpleparse.common.phonetics.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.common.phonetics.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.common.phonetics.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,83 +0,0 @@ - - -Python: module simpleparse.common.phonetics - - - - -
     
    - 
    simpleparse.common.phonetics
    index
    /home/mcfletch/pylive/simpleparse/common/phonetics.py
    -

    Phonetic spellings for character values

    -At the moment, only contains the "military alphabet"
    -(Alpha, Bravo ... Yankee, Zulu), which is used as
    -alternative timezone names by the military and apparently
    -some aviation groups.  Note, these are fairly common spellings,
    -but they aren't necessarily going to match a particular
    -usage.  I may have missed some of the possibilities...

    -        military_alphabet_char -- fully spelled out versions of
    -                the Alpha, Bravo ... Yankee, Zulu phonetic alphabet,
    -                including a few minor variations in spelling such as
    -                Xray and X-ray.  All characters use title-caps format,
    -                so Zulu, not zulu will match.
    -        military_alphabet_char_lower -- as for above, but with
    -                lowercased versions of the above

    -No interpreters are provided.  Taking the first character of
    -the name will always give you the equivalent character uppercase
    -for the military_alphabet_char and lowercase for the
    -military_alphabet_char_lower.

    -

    - - - - - -
     
    -Modules
           
    simpleparse.common
    -
    simpleparse.objectgenerator
    -
    string
    -

    - - - - - -
     
    -Data
           _letters = ['Alpha', 'Bravo', 'Charlie', 'Delta', 'Echo', 'Echo', 'Foxtrot', 'Golf', 'Gulf', 'Hotel', 'India', 'Juliette', 'Juliet', 'Kilo', 'Lima', 'Mike', 'November', 'Oscar', 'Papa', 'Quebec', ...]
    -c = {'military_alphabet_char': FirstOfGroup( - children = [ - Literal( -...( - value = 'Zulu', - ), - ], -), 'military_alphabet_char_lower': FirstOfGroup( - children = [ - Literal( -...( - value = 'zulu', - ), - ], -)}
    -item = 'Zulu'
    -military_alphabet_char = FirstOfGroup( - children = [ - Literal( -...( - value = 'Zulu', - ), - ], -)
    -military_alphabet_char_lower = FirstOfGroup( - children = [ - Literal( -...( - value = 'zulu', - ), - ], -)
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.common.strings.html simpleparse-2.2.0/doc/pydoc/simpleparse.common.strings.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.common.strings.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.common.strings.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,175 +0,0 @@ - - -Python: module simpleparse.common.strings - - - - -
     
    - 
    simpleparse.common.strings
    index
    /home/mcfletch/pylive/simpleparse/common/strings.py
    -

    Python string parsers with escape characters

    -Python-string-like operation as much as possible, this includes:
    -        support for single and double-quoted strings
    -        support for triple-quoted versions of the same
    -        support for special character escapes as seen in 8-bit python strings
    -        support for octal and hexidecimal character escapes


    -        string_single_quote
    -        string_double_quote
    -        string_triple_single
    -        string_triple_double
    -                Individual string types with the above features

    -        string
    -                Any of the above string types, in a simple FirstOf group
    -                with the triple-quoted types first, then the single quoted
    -                i.e. generated with this grammar:

    -                string_triple_double/string_triple_single/string_double_quote/string_single_quote
    -                

    -Interpreters:
    -        StringInterpreter
    -                Interprets any/all of the above as a normal (non-Raw) Python
    -                regular (non-unicode) string.  Hopefully the action is identical
    -                to doing eval( matchedString, {},{}), without the negative security
    -                implications of that approach.  Note that you need to make the
    -                interpreter available under each name you use directly in your
    -                grammar, so if you use string_single_quote and string_double_quote
    -                directly, then you need to add:
    -                        string_single_quote = myStringInterpreterInstance
    -                        string_double_quote = myStringInterpreterInstance
    -                to your processor class.

    -

    - - - - - -
     
    -Modules
           
    simpleparse.common.chartypes
    -
    simpleparse.common
    -
    simpleparse.objectgenerator
    -
    string
    -

    - - - - - -
     
    -Classes
           
    -
    DispatchProcessor(Processor) -
    -
    -
    StringInterpreter -
    -
    -
    -

    - - - - - - - -
     
    -class StringInterpreter(DispatchProcessor)
       Processor for converting parsed string values to their "intended" value

    -Basically this processor handles de-escaping and stripping the
    -surrounding quotes, so that you get the string as a Python string
    -value.  You use the processor by creating an instance of
    -StringInterpreter() as an item in another processor's
    -methodSource object (often the Parser itself).

    -For example:

    -        class MyProcessor( DispatchProcessor ):
    -                string = StringInterpreter()
    -                
    -                # following would be used if you have, for instance,
    -                # used string_single_quote in an area where double
    -                # or triple-quoted strings are not allowed, but have
    -                # used string in another area.
    -                string_single_quote = string
     
     
    Method resolution order:
    -
    StringInterpreter
    -
    DispatchProcessor
    -
    Processor
    -
    MethodSource
    -
    -
    -Methods defined here:
    -
    backslash_char(self, (tag, left, right, sublist), buffer)
    - -
    char_no_quote(self, (tag, left, right, sublist), buffer)
    - -
    escaped_char(self, (tag, left, right, sublist), buffer)
    - -
    hex_escaped_char(self, (tag, left, right, sublist), buffer)
    - -
    nondelimiter = char_no_quote(self, (tag, left, right, sublist), buffer)
    - -
    octal_escaped_char(self, (tag, left, right, sublist), buffer)
    - -
    string(self, (tag, left, right, sublist), buffer)
    Dispatch any of the string types and return the result
    - -
    string_double_quote = string_single_quote(self, (tag, left, right, sublist), buffer)
    - -
    string_single_quote(self, (tag, left, right, sublist), buffer)
    - -
    string_special_escapes(self, (tag, left, right, sublist), buffer)
    Maps "special" escapes to the corresponding characters
    - -
    string_triple_double = string_single_quote(self, (tag, left, right, sublist), buffer)
    - -
    string_triple_single = string_single_quote(self, (tag, left, right, sublist), buffer)
    - -
    -Data and other attributes defined here:
    -
    specialescapedmap = {'\n': '', '"': '"', "'": "'", r'\': r'\', 'a': '\x07', 'b': '\x08', 'f': '\x0c', 'n': '\n', 'r': '\r', 't': '\t', ...}
    - -
    -Methods inherited from DispatchProcessor:
    -
    __call__(self, value, buffer)
    Process the results of the parsing run over buffer

    -Value can either be: (success, tags, next) for a top-level
    -production, or (tag, left, right, children) for a non-top
    -production.
    - -
    -Methods inherited from Processor:
    -
    __repr__(self)
    Return a representation of the class
    - -

    - - - - - -
     
    -Data
           _p = <simpleparse.parser.Parser instance>
    -_stringTypeData = [('string_double_quote', '\n<delimiter> := \'"\'\nnondelimiter...\\\\"]+\nstring_special_escapes := [\\\\abfnrtv"]\n'), ('string_single_quote', '\n<delimiter> := "\'"\nnondelimiter...\\\\\']+\nstring_special_escapes := [\\\\abfnrtv\']\n'), ('string_triple_single', '\nnondelimiter := -"\'\'\'"\n<delimite...\\\\\']+\nstring_special_escapes := [\\\\abfnrtv\']\n'), ('string_triple_double', '\nnondelimiter := -\'"""\'\n<delimite...\\\\"]+\nstring_special_escapes := [\\\\abfnrtv"]\n')]
    -c = {'string': LibraryElement( - production = 'string', - g...enerator.Generator instance at 0x2aaaad608d88>, -), 'string_double_quote': LibraryElement( - production = 'str', - gene...enerator.Generator instance at 0x2aaaad82b200>, -), 'string_single_quote': LibraryElement( - production = 'str', - gene...enerator.Generator instance at 0x2aaaad602680>, -), 'string_triple_double': LibraryElement( - production = 'str', - gene...enerator.Generator instance at 0x2aaaad5d95a8>, -), 'string_triple_single': LibraryElement( - production = 'str', - gene...enerator.Generator instance at 0x2aaaad827710>, -)}
    -name = 'string_triple_double'
    -partial = '\nnondelimiter := -\'"""\'\n<delimite...\\\\"]+\nstring_special_escapes := [\\\\abfnrtv"]\n'
    -stringDeclaration = '\n# note that non-delimiter can never be hit by n...# i.e. a backslash preceding a non-special char\n\n'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.common.timezone_names.html simpleparse-2.2.0/doc/pydoc/simpleparse.common.timezone_names.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.common.timezone_names.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.common.timezone_names.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,157 +0,0 @@ - - -Python: module simpleparse.common.timezone_names - - - - -
     
    - 
    simpleparse.common.timezone_names
    index
    /home/mcfletch/pylive/simpleparse/common/timezone_names.py
    -

    Common timezone names (civilian, military and combined)

    -These productions are a collection of common civilian and
    -military timezone names.  The list of names is by no means
    -exhaustive (nor definitive), but it gives most timezones
    -at least one named value (to make it possible to enter the
    -name), and it doesn't repeat any names (I hope ;) ).  You
    -have three major classes of names, civilian (EST, PST, GMT,
    -UTC), military single-character (A,B,C,D,E...) and military
    -phonetic spelling (Alpha, Bravo... Zulu).  The military
    -variants are combined into a single production, however.

    -        civilian_timezone_name -- the "familiar" timezones, most
    -                real-world data entry will want to use this as their
    -                "timezone" definition I'm guessing.
    -                
    -        military_timezone_name -- military timezones in the two
    -                formats outlined above.
    -                
    -        timezone_name -- combination of the two above into a
    -                single production.

    -Interpreter:

    -        TimeZoneNameInterpreter -- see below for details, by
    -                default takes the timezone name and converts to
    -                a second offset in West-negative format.  Note:
    -                this is the _opposite_ of the time module, but is
    -                the more commonly used format AFAIK.  Null matches
    -                will return a default TimeZone as specified.

    -

    - - - - - -
     
    -Modules
           
    simpleparse.common
    -
    simpleparse.objectgenerator
    -
    simpleparse.common.phonetics
    -
    time
    -

    - - - - - -
     
    -Classes
           
    -
    TimeZoneNameInterpreter -
    -

    - - - - - - - -
     
    -class TimeZoneNameInterpreter
       Intepret a timezone specified as a military or civilian timezone name

    -Return value is an offset from UTC given in seconds.
    -If a null-match is passed uses the passed defaultZone.
    -Returns values in seconds difference from UTC (negative
    -West) divided by the passed "seconds" argument.
     
     Methods defined here:
    -
    __call__(self, (tag, left, right, children), buffer)
    - -
    __init__(self, defaultZone=-14400, seconds=1.0)
    defaultZone -- ofset in seconds to be returned if there
    -        is no value specified (null-match)
    -seconds -- divisor applied to the value before returning,
    -        if you want hours, use 3600.0, if you want minutes,
    -        use 60.0, if you want days (why?), use 86400.0
    - -

    - - - - - -
     
    -Functions
           
    _build(data)
    Build the name:time map and match rule for each dataset
    -

    - - - - - -
     
    -Data
           LOCAL_ZONE = -14400
    -c = {'civilian_timezone_name': FirstOfGroup( - children = [ - Literal( -... - value = 'ACSST', - ), - ], -), 'military_timezone_name': FirstOfGroup( - children = [ - Literal( -...ral( - value = 'A', - ), - ], -), 'timezone_name': FirstOfGroup( - children = [ - Literal( -...ral( - value = 'A', - ), - ], -)}
    -civilian_data = [('NZDT', 46800), ('IDLE', 43200), ('NZST', 43200), ('NZT', 43200), ('AESST', 39600), ('ACSST', 37800), ('CADT', 37800), ('SADT', 37800), ('AEST', 36000), ('EAST', 36000), ('GST', 36000), ('LIGT', 36000), ('ACST', 34200), ('CAST', 34200), ('SAT', 34200), ('AWSST', 32400), ('JST', 32400), ('KST', 32400), ('WDT', 32400), ('MT', 30600), ...]
    -civilian_mapping = {'ACSST': 37800, 'ACST': 34200, 'ADT': -10800, 'AESST': 39600, 'AEST': 36000, 'AHST': -36000, 'AKST': -32400, 'AST': -14400, 'AT': -7200, 'AWSST': 32400, ...}
    -civilian_rule = FirstOfGroup( - children = [ - Literal( -... - value = 'ACSST', - ), - ], -)
    -item = 'Zulu'
    -key = 'Z'
    -timezone_data = [('NZDT', 46800), ('IDLE', 43200), ('NZST', 43200), ('NZT', 43200), ('AESST', 39600), ('ACSST', 37800), ('CADT', 37800), ('SADT', 37800), ('AEST', 36000), ('EAST', 36000), ('GST', 36000), ('LIGT', 36000), ('ACST', 34200), ('CAST', 34200), ('SAT', 34200), ('AWSST', 32400), ('JST', 32400), ('KST', 32400), ('WDT', 32400), ('MT', 30600), ...]
    -timezone_mapping = {'A': 3600, 'ACSST': 37800, 'ACST': 34200, 'ADT': -10800, 'AESST': 39600, 'AEST': 36000, 'AHST': -36000, 'AKST': -32400, 'AST': -14400, 'AT': -7200, ...}
    -timezone_rule = FirstOfGroup( - children = [ - Literal( -...ral( - value = 'A', - ), - ], -)
    -value = 0
    -zulu_data = [('Alpha', 3600), ('Bravo', 7200), ('Charlie', 10800), ('Delta', 14400), ('Echo', 18000), ('Echo', 18000), ('Foxtrot', 21600), ('Golf', 25200), ('Gulf', 25200), ('Hotel', 28800), ('India', 32400), ('Kilo', 36000), ('Lima', 39600), ('Mike', 43200), ('November', -3600), ('Oscar', -7200), ('Papa', -10800), ('Quebec', -14400), ('Romeo', -18000), ('Sierra', -21600), ...]
    -zulu_mapping = {'A': 3600, 'Alpha': 3600, 'B': 7200, 'Bravo': 7200, 'C': 10800, 'Charlie': 10800, 'D': 14400, 'Delta': 14400, 'E': 18000, 'Echo': 18000, ...}
    -zulu_rule = FirstOfGroup( - children = [ - Literal( -...ral( - value = 'A', - ), - ], -)
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.dispatchprocessor.html simpleparse-2.2.0/doc/pydoc/simpleparse.dispatchprocessor.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.dispatchprocessor.html 2006-02-19 01:05:12.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.dispatchprocessor.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,88 +0,0 @@ - - -Python: module simpleparse.dispatchprocessor - - - - -
     
    - 
    simpleparse.dispatchprocessor
    index
    /home/mcfletch/pylive/simpleparse/dispatchprocessor.py
    -

    Dispatch-processor API

    -This is a post-processing processor API based on dispatching
    -each element of a result tree in a top-down recursive call
    -structure.  It is the API used by the SimpleParseGrammar Parser,
    -and likely will be the default processor for SimpleParse.

    -

    - - - - - -
     
    -Classes
           
    -
    Processor(MethodSource) -
    -
    -
    DispatchProcessor -
    -
    -
    -

    - - - - - - - -
     
    -class DispatchProcessor(Processor)
       Dispatch results-tree in a top-down recursive pattern with
    -attribute lookup to determine production -> method correspondence.

    -To use the class, subclass it, then define methods for
    -processing each production.  The methods should take this form:
    -        def production_name( self, (tag, left, right, children), buffer):
    -                pass
    -Where children may be either a list, or None, and buffer is the
    -entire buffer being parsed.
     
     
    Method resolution order:
    -
    DispatchProcessor
    -
    Processor
    -
    MethodSource
    -
    -
    -Methods defined here:
    -
    __call__(self, value, buffer)
    Process the results of the parsing run over buffer

    -Value can either be: (success, tags, next) for a top-level
    -production, or (tag, left, right, children) for a non-top
    -production.
    - -
    -Methods inherited from Processor:
    -
    __repr__(self)
    Return a representation of the class
    - -

    - - - - - -
     
    -Functions
           
    dispatch(source, tag, buffer)
    Dispatch on source for tag with buffer

    -Find the attribute or key tag[0] of source,
    -then call it with (tag, buffer)
    -
    dispatchList(source, taglist, buffer)
    Dispatch on source for each tag in taglist with buffer
    -
    getString((tag, left, right, sublist), buffer)
    Return the string value of the tag passed
    -
    lines(start=None, end=None, buffer=None)
    Return line number in file at character index (mx.TextTools version)
    -
    multiMap(taglist, source=None, buffer=None)
    Convert a taglist to a mapping from tag-object:[list-of-tags]

    -For instance, if you have items of 3 different types, in any order,
    -you can retrieve them all sorted by type with multimap( childlist)
    -then access them by tagobject key.
    -
    singleMap(taglist, source=None, buffer=None)
    Convert a taglist to a mapping from tag-object:tag, overwritting early with late tags
    -
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.error.html simpleparse-2.2.0/doc/pydoc/simpleparse.error.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.error.html 2006-02-19 01:05:12.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.error.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,107 +0,0 @@ - - -Python: module simpleparse.error - - - - -
     
    - 
    simpleparse.error
    index
    /home/mcfletch/pylive/simpleparse/error.py
    -

    Definition of the ParserSyntaxError raised on parse failure

    -

    - - - - - -
     
    -Modules
           
    string
    -

    - - - - - -
     
    -Classes
           
    -
    SyntaxError(StandardError) -
    -
    -
    ParserSyntaxError -
    -
    -
    -

    - - - - - - - -
     
    -class ParserSyntaxError(SyntaxError)
       Sub-class of SyntaxError for use by SimpleParse parsers

    -Every instance will have the following attributes:
    -        buffer -- pointer to the source buffer
    -        position -- integer position in buffer where error occured or -1
    -        production -- the production which failed
    -        expected -- string (currently taken from grammar) describing
    -                what production/element token failed to match
    -the following will be calculated in order to display
    -human-friendly error messages:
    -        line -- ~ text line-number or -1
    -        lineChar -- ~ character on line where parsing failed or -1
     
     
    Method resolution order:
    -
    ParserSyntaxError
    -
    SyntaxError
    -
    StandardError
    -
    Exception
    -
    -
    -Methods defined here:
    -
    __str__(self)
    Create a string representation of the error
    - -
    getLineCoordinate(self)
    Get (line number, line character) for the error
    - -
    messageFormat(self, template=None)
    Create a default message for this syntax error
    - -
    -Data and other attributes defined here:
    -
    DEFAULTTEMPLATE = 'Failed parsing production "%(production)s" @pos ...\nExpected syntax: %(expected)s\nGot text: %(text)s'
    - -
    buffer = ''
    - -
    expected = ''
    - -
    line = -1
    - -
    position = -1
    - -
    production = ''
    - -
    -Methods inherited from SyntaxError:
    -
    __init__(...)
    - -
    -Data and other attributes inherited from SyntaxError:
    -
    filename = None
    - -
    lineno = None
    - -
    msg = ''
    - -
    offset = None
    - -
    print_file_and_line = None
    - -
    text = None
    - -
    -Methods inherited from Exception:
    -
    __getitem__(...)
    - -

    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.bad_declaration.html simpleparse-2.2.0/doc/pydoc/simpleparse.examples.bad_declaration.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.bad_declaration.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.examples.bad_declaration.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,36 +0,0 @@ - - -Python: module simpleparse.examples.bad_declaration - - - - -
     
    - 
    simpleparse.examples.bad_declaration
    index
    /home/mcfletch/pylive/simpleparse/examples/bad_declaration.py
    -

    Demonstrates what happens when your declaration is syntactically incorrect

    -When run as a script, will generate a traceback
    -telling you that the grammar defined here is
    -incorrectly formatted.

    -

    - - - - - -
     
    -Modules
           
    simpleparse.common.comments
    -
    simpleparse.common.numbers
    -
    simpleparse.common.strings
    -

    - - - - - -
     
    -Data
           declaration = '# note use of raw string when embedding in pytho...ring/number/identifier\nts := [ \\t]*\n'
    -testdata = '[test1]\n\tval=23\n'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.findlineset.html simpleparse-2.2.0/doc/pydoc/simpleparse.examples.findlineset.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.findlineset.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.examples.findlineset.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,24 +0,0 @@ - - -Python: module simpleparse.examples.findlineset - - - - -
     
    - 
    simpleparse.examples.findlineset
    index
    /home/mcfletch/pylive/simpleparse/examples/findlineset.py
    -

    -

    - - - - - -
     
    -Data
           declaration = '\n\nfirstLine := "This is first line"\nsecondLine :... secondLine, -fifthLine*, fifthLine\nsets := set*\n'
    -file1 = 'This is first line\nThis is second line\nThis is N...d line\nThis is fifth line\nThis is NOT fifth line\n'
    -file2 = 'This is first line\nThis is fifth line\nThis is se...e\nThis is NOT second line\nThis is NOT fifth line\n'
    -p = <simpleparse.parser.Parser instance>
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.findliterals.html simpleparse-2.2.0/doc/pydoc/simpleparse.examples.findliterals.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.findliterals.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.examples.findliterals.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,41 +0,0 @@ - - -Python: module simpleparse.examples.findliterals - - - - -
     
    - 
    simpleparse.examples.findliterals
    index
    /home/mcfletch/pylive/simpleparse/examples/findliterals.py
    -

    -

    - - - - - -
     
    -Modules
           
    os
    -
    string
    -

    - - - - - -
     
    -Functions
           
    bigtest(file, parser=<simpleparse.parser.Parser instance>)
    -
    test()
    -

    - - - - - -
     
    -Data
           declaration = '\nmyfile := (notliteral,literal)+, notliteral\n\n# ...rtv]\nOCTALESCAPEDCHAR := [0-7],[0-7]?,[0-7]?\n'
    -parser = <simpleparse.parser.Parser instance>
    -usage = ' findliterals filename\nFinds all single and doub...hem to stdout.\nIs not triple-quoted string aware.'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.formatvrml.html simpleparse-2.2.0/doc/pydoc/simpleparse.examples.formatvrml.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.formatvrml.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.examples.formatvrml.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,80 +0,0 @@ - - -Python: module simpleparse.examples.formatvrml - - - - -
     
    - 
    simpleparse.examples.formatvrml
    index
    /home/mcfletch/pylive/simpleparse/examples/formatvrml.py
    -

    Example using a parser to format VRML97 code as HTML w/CSS

    -

    - - - - - -
     
    -Modules
           
    os
    -
    string
    -
    sys
    -

    - - - - - -
     
    -Classes
           
    -
    VRMLFormatter -
    -
    -
    HTMLVRMLFormatter -
    -
    -
    -

    - - - - - - - -
     
    -class HTMLVRMLFormatter(VRMLFormatter)
       Format VRML files for display in HTML
     
     Data and other attributes defined here:
    -
    NODEMAP = {'DEF': '<%(head)sstrong>', 'DEFName': '<span class="%(nodetype)s">', 'DEFName_tail': '</span>', 'EXTERNPROTO': '<span class="%(nodetype)s">', 'EXTERNPROTO_tail': '</span>', 'PROTO': '<span class="%(nodetype)s">', 'PROTO_tail': '</span>', 'ROUTEData': '<strong class="%(nodetype)s">', 'ROUTEData_tail': '</span>', 'SFString': '<span class="%(nodetype)s">', ...}
    - -
    -Methods inherited from VRMLFormatter:
    -
    __init__(self, infile, vrmlparser=<simpleparse.parser.Parser instance>)
    - -
    format(self, outfile)
    - -

    - - - - - - - -
     
    -class VRMLFormatter
       Base formatting class
     
     Methods defined here:
    -
    __init__(self, infile, vrmlparser=<simpleparse.parser.Parser instance>)
    - -
    format(self, outfile)
    - -

    - - - - - -
     
    -Data
           VRMLPARSERDEF = '\n# Specialised VRML parser for colourising VRML ... := ( [ \\011-\\015,]+ / comment+ )*\n'
    -usage = "formatvrml.py infile outfile\n\tinfile -- properly... your\n\tVRML by changing this file's definitions.\n"
    -vrmlparser = <simpleparse.parser.Parser instance>
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.html simpleparse-2.2.0/doc/pydoc/simpleparse.examples.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.html 2006-02-19 01:05:12.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.examples.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,33 +0,0 @@ - - -Python: package simpleparse.examples - - - - -
     
    - 
    simpleparse.examples
    index
    /home/mcfletch/pylive/simpleparse/examples/__init__.py
    -

    Examples of use for the SimpleParse parser generator

    -Included are VRML97, EBNF and LISP parsers, as
    -well as a demonstration of using "pre-built"
    -parser nodes (particularly one based on the re
    -module).

    -

    - - - - - -
     
    -Package Contents
           __init__ -- Examples of use for the SimpleParse parser generator
    bad_declaration -- Demonstrates what happens when your declaration is syntactically incorrect
    findlineset
    findliterals
    formatvrml -- Example using a parser to format VRML97 code as HTML w/CSS
    lisp -- Basic LISP parser
    prebuilt_call -- Example using pre-built "re" parsing object
    py_ebnf -- This module defines a parser for the EBNF format used to define Python's grammar
    simpleexample
    simpleexample2_1
    simpleexample2_2 -- Re-written version of simpleexample for 2.0
    simpleexample2_3 -- Re-written version of simpleexample for 2.0
    transformation -- A simple example of parsing
    vrml -- VRML97-compliant Parser
    vrml_erronfail -- VRML97-compliant Parser

    - - - - - -
     
    -Data
           __path__ = ['/home/mcfletch/pylive/simpleparse/examples']
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.__init__.html simpleparse-2.2.0/doc/pydoc/simpleparse.examples.__init__.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.__init__.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.examples.__init__.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,19 +0,0 @@ - - -Python: module simpleparse.examples.__init__ - - - - -
     
    - 
    simpleparse.examples.__init__
    index
    /home/mcfletch/pylive/simpleparse/examples/__init__.py
    -

    Examples of use for the SimpleParse parser generator

    -Included are VRML97, EBNF and LISP parsers, as
    -well as a demonstration of using "pre-built"
    -parser nodes (particularly one based on the re
    -module).

    - - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.lisp.html simpleparse-2.2.0/doc/pydoc/simpleparse.examples.lisp.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.lisp.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.examples.lisp.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,40 +0,0 @@ - - -Python: module simpleparse.examples.lisp - - - - -
     
    - 
    simpleparse.examples.lisp
    index
    /home/mcfletch/pylive/simpleparse/examples/lisp.py
    -

    Basic LISP parser

    -We use library items, so we get " strings, float, int, and hex
    -atoms, as well as lists.  Note: Lisp doesn't seem to
    -use "," for seperating atoms in lists?  I don't really
    -remember it well enough to recall, but seems to match the
    -samples I see.

    -Note: Original grammar was from a sample in the YAPPS
    -documentation.  Though it's kinda hard to recognise here.

    -

    - - - - - -
     
    -Modules
           
    simpleparse.common.numbers
    -
    simpleparse.common.strings
    -

    - - - - - -
     
    -Data
           definition = '\n### A simple LISP parser\n\n<ts> := [ \\t\\n...q?, ")"!\n>seq< := ts, atom, (ts,atom)*, ts\n'
    -parser = <simpleparse.parser.Parser instance>
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.prebuilt_call.html simpleparse-2.2.0/doc/pydoc/simpleparse.examples.prebuilt_call.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.prebuilt_call.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.examples.prebuilt_call.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,337 +0,0 @@ - - -Python: module simpleparse.examples.prebuilt_call - - - - -
     
    - 
    simpleparse.examples.prebuilt_call
    index
    /home/mcfletch/pylive/simpleparse/examples/prebuilt_call.py
    -

    Example using pre-built "re" parsing object

    -The Pre-built Element Token lets you include elements
    -which cannot be readily defined in the SimpleParse EBNF
    -including items defined by a callout to a Python
    -function.  This example demonstrates the technique.

    -The example also (obviously) demonstrates the use of an
    -re object during the parsing process.

    -

    - - - - - -
     
    -Modules
           
    simpleparse.dispatchprocessor
    -simpleparse.stt.TextTools.mxTextTools.mxTextTools
    -
    re
    -string
    -
    time
    -types
    -

    - - - - - -
     
    -Classes
           
    -
    DispatchProcessor(Processor) -
    -
    -
    WordProcessor -
    -
    -
    REMatch -
    -

    - - - - - - - -
     
    -class REMatch
       An object wrapping a regular expression with __call__ (and Call) semantics
     
     Methods defined here:
    -
    __call__(self, text, position, endPosition)
    Return new text position, if > position, then matched, otherwise fails
    - -
    __init__(self, expression, flags=0)
    - -
    table(self)
    Build the TextTools table for the object
    - -

    - - - - - - - -
     
    -class WordProcessor(DispatchProcessor)
       Processor sub-class defining processing functions for the productions
     
     
    Method resolution order:
    -
    WordProcessor
    -
    DispatchProcessor
    -
    Processor
    -
    MethodSource
    -
    -
    -Methods defined here:
    -
    white(self, tup, buffer)
    Deal with a "white" production by printing out value
    - -
    word(self, tup, buffer)
    Deal with a "word" production by printing out value
    - -
    -Methods inherited from DispatchProcessor:
    -
    __call__(self, value, buffer)
    Process the results of the parsing run over buffer

    -Value can either be: (success, tags, next) for a top-level
    -production, or (tag, left, right, children) for a non-top
    -production.
    - -
    -Methods inherited from Processor:
    -
    __repr__(self)
    Return a representation of the class
    - -

    - - - - - -
     
    -Functions
           
    BMS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    CharSet(...)
    CharSet(definition)

    -Create a character set matching object from the string
    -
    FS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    FSType = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    TagTable(...)
    TagTable(definition[,cachable=1])
    -
    TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    UnicodeTagTable(...)
    TagTable(definition[,cachable=1])
    -
    charsplit(...)
    charsplit(text,char,start=0,stop=len(text))

    -Split text[start:stop] into substrings at char and
    -return the result as list of strings.
    -
    cmp(...)
    cmp(a,b)

    -Compare two valid taglist tuples w/r to their slice
    -position; this is useful for sorting joinlists.
    -
    hex2str(...)
    hex2str(text)

    -Return text interpreted as two byte HEX values converted
    -to a string.
    -
    isascii(...)
    isascii(text,start=0,stop=len(text))

    -Return 1/0 depending on whether text only contains ASCII
    -characters.
    -
    join(...)
    join(joinlist,sep='',start=0,stop=len(joinlist))

    -Copy snippets from different strings together producing a
    -new string
    -The first argument must be a list of tuples or strings;
    -tuples must be of the form (string,l,r[,...]) and turn out
    -as string[l:r]
    -NOTE: the syntax used for negative slices is different
    -than the Python standard: -1 corresponds to the first
    -character *after* the string, e.g. ('Example',0,-1) gives
    -'Example' and not 'Exampl', like in Python
    -sep is an optional separator string, start and stop
    -define the slice of joinlist that is taken into accont.
    -
    joinlist(...)
    joinlist(text,list,start=0,stop=len(text))

    -Takes a list of tuples (replacement,l,r,...) and produces
    -a taglist suitable for join() which creates a copy
    -of text where every slice [l:r] is replaced by the
    -given replacement
    -- the list must be sorted using cmp() as compare function
    -- it may not contain overlapping slices
    -- the slices may not contain negative indices
    -- if the taglist cannot contain overlapping slices, you can
    -  give this function the taglist produced by tag() directly
    -  (sorting is not needed, as the list will already be sorted)
    -- start and stop set the slice to work in, i.e. text[start:stop]
    -
    lower(...)
    lower(text)

    -Return text converted to lower case.
    -
    prefix(...)
    prefix(text,prefixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -prefix out of the tuple of strings given in prefixes.
    -If no prefix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    set(...)
    set(string,logic=1)

    -Returns a character set for string: a bit encoded version
    -of the characters occurring in string.
    -- logic can be set to 0 if all characters *not* in string
    -  should go into the set
    -
    setfind(...)
    setfind(text,set,start=0,stop=len(text))

    -Find the first occurence of any character from set in
    -text[start:stop]
    - set must be a string obtained with set()
    -DEPRECATED: use CharSet().search() instead.
    -
    setsplit(...)
    setsplit(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set,
    -omitting the splitting parts and empty substrings.
    -set must be a string obtained from set()
    -DEPRECATED: use CharSet().split() instead.
    -
    setsplitx(...)
    setsplitx(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set, so
    -that every second entry consists only of characters in set.
    -set must be a string obtained with set()
    -DEPRECATED: use CharSet().splitx() instead.
    -
    setstrip(...)
    setstrip(text,set,start=0,stop=len(text),mode=0)

    -Strip all characters in text[start:stop] appearing in set.
    -mode indicates where to strip (<0: left; =0: left and right;
    ->0: right). set must be a string obtained with set()
    -DEPRECATED: use CharSet().strip() instead.
    -
    splitat(...)
    splitat(text,char,nth=1,start=0,stop=len(text))

    -Split text[start:stop] into two substrings at the nth
    -occurance of char and return the result as 2-tuple. If the
    -character is not found, the second string is empty. nth may
    -be negative: the search is then done from the right and the
    -first string is empty in case the character is not found.
    -
    str2hex(...)
    str2hex(text)

    -Return text converted to a string consisting of two byte
    -HEX values.
    -
    suffix(...)
    suffix(text,suffixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -suffix out of the tuple of strings given in suffixes.
    -If no suffix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    tag(...)
    tag(text,tagtable,sliceleft=0,sliceright=len(text),taglist=[],context=None) 
    -Produce a tag list for a string, given a tag-table
    -- returns a tuple (success, taglist, nextindex)
    -- if taglist == None, then no taglist is created
    -
    upper(...)
    upper(text)

    -Return text converted to upper case.
    -

    - - - - - -
     
    -Data
           A2Z = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    -A2Z_charset = <Character Set object for 'A-Z'>
    -A2Z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -AllIn = 11
    -AllInCharSet = 41
    -AllInSet = 31
    -AllNotIn = 12
    -AppendMatch = 2048
    -AppendTagobj = 1024
    -AppendToTagobj = 512
    -BOYERMOORE = 0
    -Break = 0
    -Call = 201
    -CallArg = 202
    -CallTag = 256
    -EOF = 101
    -FASTSEARCH = 1
    -Fail = 100
    -Here = 1
    -Is = 13
    -IsIn = 14
    -IsInCharSet = 42
    -IsInSet = 32
    -IsNot = 15
    -IsNotIn = 15
    -Jump = 100
    -JumpTarget = 104
    -LookAhead = 4096
    -Loop = 205
    -LoopControl = 206
    -MatchFail = -1000000
    -MatchOk = 1000000
    -Move = 103
    -NoWord = 211
    -Reset = -1
    -Skip = 102
    -SubTable = 207
    -SubTableInList = 208
    -TRIVIAL = 2
    -Table = 203
    -TableInList = 204
    -ThisTable = 999
    -To = 0
    -ToBOF = 0
    -ToEOF = -1
    -Umlaute = '\xc4\xd6\xdc'
    -Umlaute_charset = <Character Set object for '\xc4\xd6\xdc'>
    -Word = 21
    -WordEnd = 23
    -WordStart = 22
    -a2z = 'abcdefghijklmnopqrstuvwxyz'
    -a2z_charset = <Character Set object for 'a-z'>
    -a2z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
    -alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'>
    -alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alphanumeric = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
    -alphanumeric_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'>
    -alphanumeric_set = '\x00\x00\x00\x00\x00\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -any = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -any_charset = <Character Set object for '\x00-\xff'>
    -any_set = '\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -declaration = '\nv := white?,(word,white?)+\n'
    -formfeed = '\x0c'
    -formfeed_charset = <Character Set object for '\x0c'>
    -german_alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'
    -german_alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTU...hijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'>
    -german_alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00@\x90\x10\x00@\x10'
    -id2cmd = {-1000000: 'MatchFail', -1: 'ToEOF', 0: 'Fail/Jump', 1: 'Here', 11: 'AllIn', 12: 'AllNotIn', 13: 'Is', 14: 'IsIn', 15: 'IsNotIn', 21: 'Word', ...}
    -newline = '\r\n'
    -newline_charset = <Character Set object for '\r\n'>
    -newline_set = '\x00$\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -nonwhitespace_charset = <Character Set object for '^ \t\x0b\r\n\x0c'>
    -nonwhitespace_set = '\xff\xc1\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -number = '0123456789'
    -number_charset = <Character Set object for '0-9'>
    -number_set = '\x00\x00\x00\x00\x00\x00\xff\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -parser = <simpleparse.parser.Parser instance>
    -sFindWord = 213
    -sWordEnd = 212
    -sWordStart = 211
    -tagtable_cache = {(46912536021760, 0): <String Tag Table object>, (46912540134840, 0): <String Tag Table object>, (46912541410080, 0): <String Tag Table object>, (46912541454848, 0): <String Tag Table object>, (46912541455136, 0): <String Tag Table object>, (46912541455208, 0): <String Tag Table object>, (46912541489264, 0): <String Tag Table object>, (46912541566016, 0): <String Tag Table object>, (46912543903688, 0): <String Tag Table object>, (46912543908136, 0): <String Tag Table object>, ...}
    -to_lower = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -to_upper = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -umlaute = '\xe4\xf6\xfc\xdf'
    -umlaute_charset = <Character Set object for '\xe4\xf6\xfc\xdf'>
    -white = ' \t\x0b'
    -white_charset = <Character Set object for ' \t\x0b'>
    -white_set = '\x00\x02\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -whitespace = ' \t\x0b\r\n\x0c'
    -whitespace_charset = <Character Set object for ' \t\x0b\r\n\x0c'>
    -whitespace_set = '\x00&\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.py_ebnf.html simpleparse-2.2.0/doc/pydoc/simpleparse.examples.py_ebnf.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.py_ebnf.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.examples.py_ebnf.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,76 +0,0 @@ - - -Python: module simpleparse.examples.py_ebnf - - - - -
     
    - 
    simpleparse.examples.py_ebnf
    index
    /home/mcfletch/pylive/simpleparse/examples/py_ebnf.py
    -

    This module defines a parser for the EBNF format used to define Python's grammar

    -The grammar in the language reference (as seen in Python 2.2.1)
    -seems to be somewhat messed up.  I've tried to fix the glaring
    -errors (such as control codes included in the text version) and
    -have saved this in the examples directory.

    -This parser does parse the entire (fixed) grammar, with the
    -exception of the <XXX> style comments which were used in a few
    -places in the grammar to say "range of characters"

    -What this doesn't do, is try to _use_ the parsed grammar.  The
    -grammar is assuming a very different parser type than SimpleParse,
    -for instance, it assumes that alternation (|) will use longest-of
    -semantics, so that:

    -        int      := blah
    -        long_int := int, [lL]
    -        all_ints := int, long_int

    -Would match long_int (because it's longest), rather than int, which
    -is what the base SimpleParse FOGroup would do.  You could fairly
    -trivially make a processor similar to the simpleparsegrammar one
    -to make objectgenerator objects from the parsed format, but the
    -resulting parser just wouldn't work because of the differences in
    -parser capability.

    -Basically, we'll want to have a new back-end before continuing on
    -with this demo.

    -The grammar being parsed (and included) is part of Python, so
    -here's the copyright notice:

    -        Python is Copyright (c) 2001, 2002 Python Software Foundation.
    -        All Rights Reserved.

    -        Copyright (c) 2000 BeOpen.com.
    -        All Rights Reserved.

    -        Copyright (c) 1995-2001 Corporation for National Research Initiatives.
    -        All Rights Reserved.

    -        Copyright (c) 1991-1995 Stichting Mathematisch Centrum, Amsterdam.
    -        All Rights Reserved.
    -        
    -You should have a full copy of the Python license in your Python
    -distribution.

    -

    - - - - - -
     
    -Modules
           
    simpleparse.common.strings
    -

    - - - - - -
     
    -Data
           declaration = "\n\ndeclarationset := declaration+\ndeclarati... := string, ts, '...', ts, string\n\n"
    -parser = <simpleparse.parser.Parser instance>
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.simpleexample2_1.html simpleparse-2.2.0/doc/pydoc/simpleparse.examples.simpleexample2_1.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.simpleexample2_1.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.examples.simpleexample2_1.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,31 +0,0 @@ - - -Python: module simpleparse.examples.simpleexample2_1 - - - - -
     
    - 
    simpleparse.examples.simpleexample2_1
    index
    /home/mcfletch/pylive/simpleparse/examples/simpleexample2_1.py
    -

    -

    - - - - - -
     
    -Modules
           
    simpleparse.common.comments
    -
    simpleparse.common.numbers
    -
    simpleparse.common.strings
    -

    - - - - - -
     
    -Data
           declaration = '# note use of raw string when embedding in pytho...ring/number/identifier\nts := [ \\t]*\n'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.simpleexample2_2.html simpleparse-2.2.0/doc/pydoc/simpleparse.examples.simpleexample2_2.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.simpleexample2_2.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.examples.simpleexample2_2.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,38 +0,0 @@ - - -Python: module simpleparse.examples.simpleexample2_2 - - - - -
     
    - 
    simpleparse.examples.simpleexample2_2
    index
    /home/mcfletch/pylive/simpleparse/examples/simpleexample2_2.py
    -

    Re-written version of simpleexample for 2.0

    -Shows use of Parser to check syntax of declaration and
    -test that a particular production is matching what we
    -expect it to match...

    -

    - - - - - -
     
    -Modules
           
    simpleparse.common.comments
    -
    simpleparse.common.numbers
    -
    simpleparse.common.strings
    -

    - - - - - -
     
    -Data
           declaration = '# note use of raw string when embedding in pytho...ring/number/identifier\nts := [ \\t]*\n'
    -parser = <simpleparse.parser.Parser instance>
    -production = 'equality'
    -testEquality = ['s=3\n', 's = 3\n', ' s="three\\nthere"\n', ' s=three\n']
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.simpleexample2_3.html simpleparse-2.2.0/doc/pydoc/simpleparse.examples.simpleexample2_3.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.simpleexample2_3.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.examples.simpleexample2_3.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,34 +0,0 @@ - - -Python: module simpleparse.examples.simpleexample2_3 - - - - -
     
    - 
    simpleparse.examples.simpleexample2_3
    index
    /home/mcfletch/pylive/simpleparse/examples/simpleexample2_3.py
    -

    Re-written version of simpleexample for 2.0

    -

    - - - - - -
     
    -Modules
           
    simpleparse.common.comments
    -
    simpleparse.common.numbers
    -
    pprint
    -
    simpleparse.common.strings
    -

    - - - - - -
     
    -Data
           declaration = '# note use of raw string when embedding in pytho...ring/number/identifier\nts := [ \\t]*\n'
    -parser = <simpleparse.parser.Parser instance>
    -testData = '[test1]\n\tval=23\n\tval2="23"\n\tval3 = "23\t\nskidoo\xee"...\t; why not\n\tlog = heavy_wood\n\n[test2]\nloose=lips\n'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.simpleexample.html simpleparse-2.2.0/doc/pydoc/simpleparse.examples.simpleexample.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.simpleexample.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.examples.simpleexample.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,31 +0,0 @@ - - -Python: module simpleparse.examples.simpleexample - - - - -
     
    - 
    simpleparse.examples.simpleexample
    index
    /home/mcfletch/pylive/simpleparse/examples/simpleexample.py
    -

    -

    - - - - - -
     
    -Modules
           
    pprint
    -

    - - - - - -
     
    -Data
           declaration = '# note use of raw string when embedding in pytho...edchar)*\nescapedchar := \'\\134"\' / \'\\134\\134\'\n'
    -parser = <simpleparse.parser.Parser instance>
    -testdata = '[test1]\n\tval=23\n\tval2="23"\n\twherefore="art thou"\n\t; why not\n\tlog = heavy_wood\n\n[test2]\nloose=lips\n\n'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.transformation.html simpleparse-2.2.0/doc/pydoc/simpleparse.examples.transformation.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.transformation.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.examples.transformation.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,74 +0,0 @@ - - -Python: module simpleparse.examples.transformation - - - - -
     
    - 
    simpleparse.examples.transformation
    index
    /home/mcfletch/pylive/simpleparse/examples/transformation.py
    -

    A simple example of parsing

    -I have no idea for whom I originally created this code,
    -(which was originally written for SimpleParse 1.0) nor
    -why they wanted it.  Oh well, such is life.

    -Running as a script will do some timing tests, but the
    -tests are rather... simplistic.

    -The grammar is slow parsing around 5-10% of the speed I
    -normally expect from SimpleParse/mxTextTools parsers.
    -I'm guessing it gets into lots and lots of partial parses
    -of the "interesting" production, and that the huge number
    -of reported productions slows it down.  For example,
    -making atom non-reporting gives a 15% speedup on my
    -machine.

    -

    - - - - - -
     
    -Modules
           
    string
    -
    sys
    -

    - - - - - -
     
    -Classes
           
    -
    Emitter -
    -

    - - - - - -
     
    -class Emitter
        Methods defined here:
    -
    emit(self, tree)
    return transformation for a single tuple...
    - -
    emitexample1(self, tuple)
    *+AB+CD -> ++*AC*AD+*BC*BD
    - -
    parse(self, data)
    - -
    process(self, data)
    - -
    write(self, data)
    - -

    - - - - - -
     
    -Data
           declaration = "\nset := (interesting/multset/plusset)+\nmul...(set/atom),'++',(set/atom),(set/atom),(set/atom)\n"
    -parser = <simpleparse.parser.Parser instance>
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.vrml_erronfail.html simpleparse-2.2.0/doc/pydoc/simpleparse.examples.vrml_erronfail.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.vrml_erronfail.html 2006-02-19 01:05:15.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.examples.vrml_erronfail.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,49 +0,0 @@ - - -Python: module simpleparse.examples.vrml_erronfail - - - - -
     
    - 
    simpleparse.examples.vrml_erronfail
    index
    /home/mcfletch/pylive/simpleparse/examples/vrml_erronfail.py
    -

    VRML97-compliant Parser

    -This example is a full VRML97 parser, originally created
    -for the mcf.vrml VRML-processing system.  It supports all
    -VRML97 constructs, and should be correct for any VRML97
    -content you can produce.  The parser is fairly fast
    -(parsing around 280,000 cps on a 1GHz Athlon machine).

    -This is the errorOnFail version of the grammar, otherwise
    -identical to the vrml.py module.  Note: there is basically
    -no speed penalty for the errorOnFail version compared to
    -the original version, as the errorOnFail code is not touched
    -unless a syntax error is actually found in the input text.

    -

    - - - - - -
     
    -Modules
           
    simpleparse.common.chartypes
    -

    - - - - - -
     
    -Functions
           
    buildVRMLParser(declaration="header := -[\\n]*\nvrmlFile := heade... := ( [ \\011-\\015,]+ / ('#',-'\\012'*,'\\n')+ )*\n")
    -

    - - - - - -
     
    -Data
           VRMLPARSERDEF = "header := -[\\n]*\nvrmlFile := heade... := ( [ \\011-\\015,]+ / ('#',-'\\012'*,'\\n')+ )*\n"
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.vrml.html simpleparse-2.2.0/doc/pydoc/simpleparse.examples.vrml.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.examples.vrml.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.examples.vrml.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,35 +0,0 @@ - - -Python: module simpleparse.examples.vrml - - - - -
     
    - 
    simpleparse.examples.vrml
    index
    /home/mcfletch/pylive/simpleparse/examples/vrml.py
    -

    VRML97-compliant Parser

    -This example is a full VRML97 parser, originally created
    -for the mcf.vrml VRML-processing system.  It supports all
    -VRML97 constructs, and should be correct for any VRML97
    -content you can produce.  The parser is fairly fast
    -(parsing around 280,000 cps on a 1GHz Athlon machine).

    -

    - - - - - -
     
    -Functions
           
    buildVRMLParser(declaration="header := -[\\n]*\nrootItem := ts,(P... := ( [ \\011-\\015,]+ / ('#',-'\\012'*,'\\n')+ )*\n")
    -

    - - - - - -
     
    -Data
           VRMLPARSERDEF = "header := -[\\n]*\nrootItem := ts,(P... := ( [ \\011-\\015,]+ / ('#',-'\\012'*,'\\n')+ )*\n"
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.generator.html simpleparse-2.2.0/doc/pydoc/simpleparse.generator.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.generator.html 2006-02-19 01:05:13.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.generator.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,113 +0,0 @@ - - -Python: module simpleparse.generator - - - - -
     
    - 
    simpleparse.generator
    index
    /home/mcfletch/pylive/simpleparse/generator.py
    -

    Abstract representation of an in-memory grammar that generates parsers

    -

    - - - - - -
     
    -Modules
           
    simpleparse.stt.TextTools.TextTools
    -
    traceback
    -

    - - - - - -
     
    -Classes
           
    -
    Generator -
    GeneratorAPI1 -
    -

    - - - - - - - -
     
    -class Generator
       Abstract representation of an in-memory grammar that generates parsers

    -The generator class manages a collection of
    -ElementToken objects.  These element token objects
    -allow the generator to be separated from the
    -particular parser associated with any particular EBNF
    -grammar.  In fact, it is possible to create entire grammars
    -using only the generator objects as a python API.
     
     Methods defined here:
    -
    __init__(self)
    Initialise the Generator
    - -
    addDefinition(self, name, rootElement)
    Add a new definition (object) to the generator
    - -
    addDefinitionSource(self, item)
    Add a source for definitions when the current grammar doesn't supply
    -a particular rule (effectively common/shared items for the grammar).
    - -
    buildParser(self, name, methodSource=None)
    Build the given parser definition, returning a TextTools parsing tuple
    - -
    cacheCustomTerminalParser(self, index, flags, parser)
    Optimization to reuse customized terminal parsers
    - -
    getCustomTerminalParser(self, index, flags)
    Retrieved a cached customized terminal parser or None
    - -
    getNameIndex(self, name)
    Return the index into the main list for the given name
    - -
    getNames(self)
    Return the list of root generator objects
    - -
    getObjectForName(self, name)
    Determine whether our methodSource has a parsing method for the given name

    -returns ( flags or 0 , tagobject)
    - -
    getParserList(self)
    - -
    getRootObject(self, name)
    Get a particular root object by name
    - -
    getRootObjects(self)
    Return the list of root generator objects
    - -
    getTagObjectForName(self, name)
    Get any explicitly defined tag object for the given name
    - -
    getTerminalParser(self, index)
    Try to retrieve a parser from the parser-list
    - -
    setTerminalParser(self, index, parser)
    Explicitly set the parser value for given name
    - -

    - - - - - - - -
     
    -class GeneratorAPI1
       Stand-in class supporting operation of SimpleParse 1.0 applications

    -There was really only the one method of interest, parserbyname,
    -everything else was internal (and is now part of
    -simpleparsegrammar.py).
     
     Methods defined here:
    -
    __init__(self, production, prebuilt=())
    - -
    parserbyname(self, name)
    Retrieve a tag-table by production name
    - -

    - - - - - -
     
    -Functions
           
    buildParser(declaration, prebuiltnodes=())
    API 1.0 primary entry point, returns a GeneratorAPI1 instance

    -That object will respond to the parserbyname API expected by
    -SimpleParse 1.0 applications.
    -
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.html simpleparse-2.2.0/doc/pydoc/simpleparse.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.html 2006-02-19 01:05:11.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,32 +0,0 @@ - - -Python: package simpleparse - - - - -
     
    - 
    simpleparse
    index
    /home/mcfletch/pylive/simpleparse/__init__.py
    -

    Simple parsing using mxTextTools

    -See the /doc subdirectory for introductory and
    -general documentation.  See license.txt for licensing
    -information.  (This is a BSD-licensed package).

    -

    - - - - - -
     
    -Package Contents
           __init__ -- Simple parsing using mxTextTools
    baseparser -- Base class for real-world parsers (such as parser.Parser)
    common (package) -- Common (library) definitions
    dispatchprocessor -- Dispatch-processor API
    error -- Definition of the ParserSyntaxError raised on parse failure
    examples (package) -- Examples of use for the SimpleParse parser generator
    generator -- Abstract representation of an in-memory grammar that generates parsers
    objectgenerator -- Object-oriented tag-table generator objects
    parser -- Real-world parsers using the SimpleParse EBNF
    printers -- Utility to print Python code for a given generator object's element tokens
    processor -- Definitions of the MethodSource and Processor APIs
    setup -- Installs SimpleParse using distutils
    setupstt -- Distutils Extensions needed for the mx Extensions.
    simpleparsegrammar -- Default SimpleParse EBNF grammar as a generator with productions
    stt (package) -- SimpleParse' built-in version of the mxTextTools text-processing engine
    tests (package) -- Package of test scripts, is a package to make setup.py include it :)
    xml (package) -- XML Parsing package

    - - - - - -
     
    -Data
           __path__ = ['/home/mcfletch/pylive/simpleparse']
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.__init__.html simpleparse-2.2.0/doc/pydoc/simpleparse.__init__.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.__init__.html 2006-02-19 01:05:12.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.__init__.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,18 +0,0 @@ - - -Python: module simpleparse.__init__ - - - - -
     
    - 
    simpleparse.__init__
    index
    /home/mcfletch/pylive/simpleparse/__init__.py
    -

    Simple parsing using mxTextTools

    -See the /doc subdirectory for introductory and
    -general documentation.  See license.txt for licensing
    -information.  (This is a BSD-licensed package).

    - - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.objectgenerator.html simpleparse-2.2.0/doc/pydoc/simpleparse.objectgenerator.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.objectgenerator.html 2006-02-19 01:05:13.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.objectgenerator.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,1190 +0,0 @@ - - -Python: module simpleparse.objectgenerator - - - - -
     
    - 
    simpleparse.objectgenerator
    index
    /home/mcfletch/pylive/simpleparse/objectgenerator.py
    -

    Object-oriented tag-table generator objects

    -The objectgenerator module is the core of the SimpleParse
    -system, the various element token classes defined here
    -implement transitions from EBNF-style abstractions into
    -the low-level (assembly-like) instructions to the
    -TextTools engine.

    -Each class within the module is a sub-class of ElementToken,
    -which provides a number of common facilities, the most
    -obvious of which is the permute method, which takes care of
    -the negative, optional, and repeating flags for the normal
    -case (with character ranges and literals being non-normal).

    -

    - - - - - -
     
    -Modules
           
    copy
    -simpleparse.stt.TextTools.mxTextTools.mxTextTools
    -
    string
    -time
    -
    types
    -

    - - - - - -
     
    -Classes
           
    -
    ElementToken -
    -
    -
    ErrorOnFail -
    Group -
    -
    -
    FirstOfGroup -
    SequentialGroup -
    -
    -
    CILiteral -
    -
    -
    -
    -
    LibraryElement -
    Literal -
    Name -
    Prebuilt -
    _Range -
    -
    -
    Range -
    -
    -
    -
    -
    -

    - - - - - - - -
     
    -class CILiteral(SequentialGroup)
       Case-insensitive Literal values

    -The CILiteral is a sequence of literal and
    -character-range values, where each element is
    -positive and required.  Literal values are
    -composed of those characters which are not
    -upper-case/lower-case pairs, while the ranges
    -are all two-character ranges with the upper
    -and lower forms.

    -CILiterals in the SimpleParse EBNF grammar are defined like so:
    -        c"test", c"test"?, c"test"*, c"test"+
    -        -c"test", -c"test"?, -c"test"*, -c"test"+

    -Attributes:
    -        value -- a string storing the literal's value

    -Notes:
    -        Currently we don't support Unicode literals

    -        A CILiteral will be *much* slower than a
    -        regular literal or character range
     
     
    Method resolution order:
    -
    CILiteral
    -
    SequentialGroup
    -
    Group
    -
    ElementToken
    -
    -
    -Methods defined here:
    -
    ciParse(self, value)
    Break value into set of case-dependent groups...
    - -
    toParser(self, generator=None, noReport=0)
    - -
    -Data and other attributes defined here:
    -
    value = ''
    - -
    -Methods inherited from Group:
    -
    terminal(self, generator)
    Determine if this element is terminal for the generator
    - -
    -Data and other attributes inherited from Group:
    -
    children = ()
    - -
    terminalValue = None
    - -
    -Methods inherited from ElementToken:
    -
    __init__(self, **namedarguments)
    Initialize the object with named attributes

    -This method simply takes the named attributes and
    -updates the object's dictionary with them
    - -
    __repr__(self)
    Return a readily recognisable version of ourself
    - -
    permute(self, basetable)
    Given a positive, required, non-repeating table, convert to appropriately configured table

    -This method applies generic logic for applying the
    -operational flags to a basic recipe for an element.

    -It is normally called from the elements-token's own
    -toParser method.
    - -
    -Data and other attributes inherited from ElementToken:
    -
    errorOnFail = None
    - -
    expanded = 0
    - -
    lookahead = 0
    - -
    negative = 0
    - -
    optional = 0
    - -
    repeating = 0
    - -
    report = 1
    - -

    - - - - - - - -
     
    -class ElementToken
       Abstract base class for all ElementTokens

    -Common Attributes:

    -        negative -- the element token should match
    -                a character if the "base" definition
    -                would not match at the current position
    -        optional -- the element token will match even
    -                if the base definition would not match
    -                at the current position
    -        repeating -- if the element is successfully
    -                matched, attempt to match it again.
    -        lookahead -- if true, the scanning position
    -                of the engine will be reset after the
    -                element matches
    -        errorOnFail -- if true, the engine will call the
    -                object stored in errorOnFail as a text-
    -                matching object iff the element token fails
    -                to match.  This is used to signal
    -                SyntaxErrors.
    -                
    -Attributes only used for top-level Productions:

    -        report -- if true, the production's results
    -                will be added to the result tree
    -        expanded -- if true, the production's children's
    -                results will be added to the result tree
    -                but the production's own result will be ignored
     
     Methods defined here:
    -
    __init__(self, **namedarguments)
    Initialize the object with named attributes

    -This method simply takes the named attributes and
    -updates the object's dictionary with them
    - -
    __repr__(self)
    Return a readily recognisable version of ourself
    - -
    permute(self, basetable)
    Given a positive, required, non-repeating table, convert to appropriately configured table

    -This method applies generic logic for applying the
    -operational flags to a basic recipe for an element.

    -It is normally called from the elements-token's own
    -toParser method.
    - -
    terminal(self, generator)
    Determine if this element is terminal for the generator
    - -
    toParser(self, generator, noReport=0)
    Abstract interface for implementing the conversion to a text-tools table

    -generator -- an instance of generator.Generator
    -        which provides various facilities for discovering
    -        other productions.
    -noReport -- if true, we're being called recursively
    -        for a terminal grammar fragment where one of our
    -        parents has explicitly suppressed all reporting.

    -This method is called by the generator or by
    -another element-token's toParser method.
    - -
    -Data and other attributes defined here:
    -
    errorOnFail = None
    - -
    expanded = 0
    - -
    lookahead = 0
    - -
    negative = 0
    - -
    optional = 0
    - -
    repeating = 0
    - -
    report = 1
    - -

    - - - - - - - -
     
    -class ErrorOnFail(ElementToken)
       When called as a matching function, raises a SyntaxError

    -Attributes:
    -        expected -- list of strings describing expected productions
    -        production -- string name of the production that's failing to parse
    -        message -- overrides default message generation if non-null


    -(something,something)+!
    -(something,something)!
    -(something,something)+!"Unable to parse somethings in my production"
    -(something,something)!"Unable to parse somethings in my production"

    -if string -> give an explicit message (with optional % values)
    -else -> use a default string
     
     Methods defined here:
    -
    __call__(self, text, position, end)
    Method called by mxTextTools iff the base production fails
    - -
    copy(self)
    - -
    -Data and other attributes defined here:
    -
    expected = ''
    - -
    message = ''
    - -
    production = ''
    - -
    -Methods inherited from ElementToken:
    -
    __init__(self, **namedarguments)
    Initialize the object with named attributes

    -This method simply takes the named attributes and
    -updates the object's dictionary with them
    - -
    __repr__(self)
    Return a readily recognisable version of ourself
    - -
    permute(self, basetable)
    Given a positive, required, non-repeating table, convert to appropriately configured table

    -This method applies generic logic for applying the
    -operational flags to a basic recipe for an element.

    -It is normally called from the elements-token's own
    -toParser method.
    - -
    terminal(self, generator)
    Determine if this element is terminal for the generator
    - -
    toParser(self, generator, noReport=0)
    Abstract interface for implementing the conversion to a text-tools table

    -generator -- an instance of generator.Generator
    -        which provides various facilities for discovering
    -        other productions.
    -noReport -- if true, we're being called recursively
    -        for a terminal grammar fragment where one of our
    -        parents has explicitly suppressed all reporting.

    -This method is called by the generator or by
    -another element-token's toParser method.
    - -
    -Data and other attributes inherited from ElementToken:
    -
    errorOnFail = None
    - -
    expanded = 0
    - -
    lookahead = 0
    - -
    negative = 0
    - -
    optional = 0
    - -
    repeating = 0
    - -
    report = 1
    - -

    - - - - - - - -
     
    -class FirstOfGroup(Group)
       Set of tokens that matches (and stops searching) with the first successful child

    -A FirstOf group attempts to match each child in turn,
    -declaring success with the first successful child,
    -or failure if none of the children match.

    -Within the simpleparsegrammar, the FirstOf group
    -is defined like so:
    -        ("a" / b / c / "d")
    -i.e. a series of slash-separated element token definitions.
     
     
    Method resolution order:
    -
    FirstOfGroup
    -
    Group
    -
    ElementToken
    -
    -
    -Methods defined here:
    -
    toParser(self, generator=None, noReport=0)
    - -
    -Methods inherited from Group:
    -
    terminal(self, generator)
    Determine if this element is terminal for the generator
    - -
    -Data and other attributes inherited from Group:
    -
    children = ()
    - -
    terminalValue = None
    - -
    -Methods inherited from ElementToken:
    -
    __init__(self, **namedarguments)
    Initialize the object with named attributes

    -This method simply takes the named attributes and
    -updates the object's dictionary with them
    - -
    __repr__(self)
    Return a readily recognisable version of ourself
    - -
    permute(self, basetable)
    Given a positive, required, non-repeating table, convert to appropriately configured table

    -This method applies generic logic for applying the
    -operational flags to a basic recipe for an element.

    -It is normally called from the elements-token's own
    -toParser method.
    - -
    -Data and other attributes inherited from ElementToken:
    -
    errorOnFail = None
    - -
    expanded = 0
    - -
    lookahead = 0
    - -
    negative = 0
    - -
    optional = 0
    - -
    repeating = 0
    - -
    report = 1
    - -

    - - - - - - - -
     
    -class Group(ElementToken)
       Abstract base class for all group element tokens

    -The primary feature of a group is that it has a set
    -of element tokens stored in the attribute "children".
     
     Methods defined here:
    -
    terminal(self, generator)
    Determine if this element is terminal for the generator
    - -
    -Data and other attributes defined here:
    -
    children = ()
    - -
    terminalValue = None
    - -
    -Methods inherited from ElementToken:
    -
    __init__(self, **namedarguments)
    Initialize the object with named attributes

    -This method simply takes the named attributes and
    -updates the object's dictionary with them
    - -
    __repr__(self)
    Return a readily recognisable version of ourself
    - -
    permute(self, basetable)
    Given a positive, required, non-repeating table, convert to appropriately configured table

    -This method applies generic logic for applying the
    -operational flags to a basic recipe for an element.

    -It is normally called from the elements-token's own
    -toParser method.
    - -
    toParser(self, generator, noReport=0)
    Abstract interface for implementing the conversion to a text-tools table

    -generator -- an instance of generator.Generator
    -        which provides various facilities for discovering
    -        other productions.
    -noReport -- if true, we're being called recursively
    -        for a terminal grammar fragment where one of our
    -        parents has explicitly suppressed all reporting.

    -This method is called by the generator or by
    -another element-token's toParser method.
    - -
    -Data and other attributes inherited from ElementToken:
    -
    errorOnFail = None
    - -
    expanded = 0
    - -
    lookahead = 0
    - -
    negative = 0
    - -
    optional = 0
    - -
    repeating = 0
    - -
    report = 1
    - -

    - - - - - - - -
     
    -class LibraryElement(ElementToken)
       Holder for a prebuilt item with it's own generator
     
     Methods defined here:
    -
    toParser(self, generator=None, noReport=0)
    - -
    -Data and other attributes defined here:
    -
    generator = None
    - -
    methodSource = None
    - -
    production = ''
    - -
    -Methods inherited from ElementToken:
    -
    __init__(self, **namedarguments)
    Initialize the object with named attributes

    -This method simply takes the named attributes and
    -updates the object's dictionary with them
    - -
    __repr__(self)
    Return a readily recognisable version of ourself
    - -
    permute(self, basetable)
    Given a positive, required, non-repeating table, convert to appropriately configured table

    -This method applies generic logic for applying the
    -operational flags to a basic recipe for an element.

    -It is normally called from the elements-token's own
    -toParser method.
    - -
    terminal(self, generator)
    Determine if this element is terminal for the generator
    - -
    -Data and other attributes inherited from ElementToken:
    -
    errorOnFail = None
    - -
    expanded = 0
    - -
    lookahead = 0
    - -
    negative = 0
    - -
    optional = 0
    - -
    repeating = 0
    - -
    report = 1
    - -

    - - - - - - - -
     
    -class Literal(ElementToken)
       Literal string value to be matched

    -Literals are one of the most common elements within
    -any grammar.  The implementation tries to use the
    -most efficient mechanism available for matching/searching
    -for a literal value, so the Literal class does not
    -use the permute method, instead defining explicit
    -parsing methodologies for each flag and value combination

    -Literals in the SimpleParse EBNF grammar are defined like so:
    -        "test", "test"?, "test"*, "test"+
    -        -"test", -"test"?, -"test"*, -"test"+

    -Attributes:
    -        value -- a string storing the literal's value

    -Notes:
    -        Currently we don't support Unicode literals
    -        
    -See also:
    -        CILiteral -- case-insensitive Literal values
     
     Methods defined here:
    -
    baseToParser(self, generator=None)
    Parser generation without considering flag settings
    - -
    terminal(self, generator)
    Determine if this element is terminal for the generator
    - -
    toParser(self, generator=None, noReport=0)
    Create the parser for the element token
    - -
    -Data and other attributes defined here:
    -
    value = ''
    - -
    -Methods inherited from ElementToken:
    -
    __init__(self, **namedarguments)
    Initialize the object with named attributes

    -This method simply takes the named attributes and
    -updates the object's dictionary with them
    - -
    __repr__(self)
    Return a readily recognisable version of ourself
    - -
    permute(self, basetable)
    Given a positive, required, non-repeating table, convert to appropriately configured table

    -This method applies generic logic for applying the
    -operational flags to a basic recipe for an element.

    -It is normally called from the elements-token's own
    -toParser method.
    - -
    -Data and other attributes inherited from ElementToken:
    -
    errorOnFail = None
    - -
    expanded = 0
    - -
    lookahead = 0
    - -
    negative = 0
    - -
    optional = 0
    - -
    repeating = 0
    - -
    report = 1
    - -

    - - - - - - - -
     
    -class Name(ElementToken)
       Reference to another rule in the grammar

    -The Name element token allows you to reference another
    -production within the grammar.  There are three major
    -sub-categories of reference depending on both the Name
    -element token and the referenced table's values.

    -if the Name token's report attribute is false,
    -or the target table's report attribute is false,
    -or the Name token negative attribute is true,
    -        the Name reference will report nothing in the result tree

    -if the target's expand attribute is true, however,
    -        the Name reference will report the children
    -        of the target production without reporting the
    -        target production's results (SubTable match)

    -finally:
    -        if the target is not expanded and the Name token
    -        should report something, the generator object is
    -        asked to supply the tag object and flags for
    -        processing the results of the target.  See the
    -        generator.MethodSource documentation for details.

    -Notes:
    -        expanded and un-reported productions won't get any
    -        methodsource methods called when 
    -        they are finished, that's just how I decided to
    -        do it, not sure if there's some case where you'd
    -        want it.  As a result, it's possible to have a
    -        method getting called for one instance (where a
    -        name ref is reporting) and not for another (where
    -        the name ref isn't reporting).
     
     Methods defined here:
    -
    terminal(self, generator)
    Determine if this element is terminal for the generator
    - -
    toParser(self, generator, noReport=0)
    Create the table for parsing a name-reference

    -Note that currently most of the "compression" optimisations
    -occur here.
    - -
    -Data and other attributes defined here:
    -
    report = 1
    - -
    terminalValue = None
    - -
    value = ''
    - -
    -Methods inherited from ElementToken:
    -
    __init__(self, **namedarguments)
    Initialize the object with named attributes

    -This method simply takes the named attributes and
    -updates the object's dictionary with them
    - -
    __repr__(self)
    Return a readily recognisable version of ourself
    - -
    permute(self, basetable)
    Given a positive, required, non-repeating table, convert to appropriately configured table

    -This method applies generic logic for applying the
    -operational flags to a basic recipe for an element.

    -It is normally called from the elements-token's own
    -toParser method.
    - -
    -Data and other attributes inherited from ElementToken:
    -
    errorOnFail = None
    - -
    expanded = 0
    - -
    lookahead = 0
    - -
    negative = 0
    - -
    optional = 0
    - -
    repeating = 0
    - -

    - - - - - - - -
     
    -class Prebuilt(ElementToken)
       Holder for pre-built TextTools tag tables

    -You can pass in a Pre-built tag table when
    -creating your grammar, doing so creates
    -Prebuilt element tokens which can be referenced
    -by the other element tokens in your grammar.
     
     Methods defined here:
    -
    toParser(self, generator=None, noReport=0)
    - -
    -Data and other attributes defined here:
    -
    value = ()
    - -
    -Methods inherited from ElementToken:
    -
    __init__(self, **namedarguments)
    Initialize the object with named attributes

    -This method simply takes the named attributes and
    -updates the object's dictionary with them
    - -
    __repr__(self)
    Return a readily recognisable version of ourself
    - -
    permute(self, basetable)
    Given a positive, required, non-repeating table, convert to appropriately configured table

    -This method applies generic logic for applying the
    -operational flags to a basic recipe for an element.

    -It is normally called from the elements-token's own
    -toParser method.
    - -
    terminal(self, generator)
    Determine if this element is terminal for the generator
    - -
    -Data and other attributes inherited from ElementToken:
    -
    errorOnFail = None
    - -
    expanded = 0
    - -
    lookahead = 0
    - -
    negative = 0
    - -
    optional = 0
    - -
    repeating = 0
    - -
    report = 1
    - -

    - - - - - - - -
     
    -class Range(_Range)
       Range type which doesn't use the CharSet features in mx.TextTools

    -This is likely to be much slower than the CharSet version (below), and
    -is unable to handle unicode character sets.  However, it will work with
    -TextTools 2.0.3, which may be needed in some cases.
     
     
    Method resolution order:
    -
    Range
    -
    _Range
    -
    ElementToken
    -
    -
    -Methods defined here:
    -
    baseToParser(self, generator=None)
    Parser generation without considering flag settings
    - -
    terminal(self, generator)
    Determine if this element is terminal for the generator
    - -
    -Methods inherited from _Range:
    -
    toParser(self, generator=None, noReport=0)
    Create the parser for the element token
    - -
    -Data and other attributes inherited from _Range:
    -
    requiresExpandedSet = 1
    - -
    value = ''
    - -
    -Methods inherited from ElementToken:
    -
    __init__(self, **namedarguments)
    Initialize the object with named attributes

    -This method simply takes the named attributes and
    -updates the object's dictionary with them
    - -
    __repr__(self)
    Return a readily recognisable version of ourself
    - -
    permute(self, basetable)
    Given a positive, required, non-repeating table, convert to appropriately configured table

    -This method applies generic logic for applying the
    -operational flags to a basic recipe for an element.

    -It is normally called from the elements-token's own
    -toParser method.
    - -
    -Data and other attributes inherited from ElementToken:
    -
    errorOnFail = None
    - -
    expanded = 0
    - -
    lookahead = 0
    - -
    negative = 0
    - -
    optional = 0
    - -
    repeating = 0
    - -
    report = 1
    - -

    - - - - - - - -
     
    -class SequentialGroup(Group)
       A sequence of element tokens which must match in a particular order

    -A sequential group must match each child in turn
    -and all children must be satisfied to consider the
    -group matched.

    -Within the simpleparsegrammar, the sequential group
    -is defined like so:
    -        ("a", b, c, "d")
    -i.e. a series of comma-separated element token definitions.
     
     
    Method resolution order:
    -
    SequentialGroup
    -
    Group
    -
    ElementToken
    -
    -
    -Methods defined here:
    -
    toParser(self, generator=None, noReport=0)
    - -
    -Methods inherited from Group:
    -
    terminal(self, generator)
    Determine if this element is terminal for the generator
    - -
    -Data and other attributes inherited from Group:
    -
    children = ()
    - -
    terminalValue = None
    - -
    -Methods inherited from ElementToken:
    -
    __init__(self, **namedarguments)
    Initialize the object with named attributes

    -This method simply takes the named attributes and
    -updates the object's dictionary with them
    - -
    __repr__(self)
    Return a readily recognisable version of ourself
    - -
    permute(self, basetable)
    Given a positive, required, non-repeating table, convert to appropriately configured table

    -This method applies generic logic for applying the
    -operational flags to a basic recipe for an element.

    -It is normally called from the elements-token's own
    -toParser method.
    - -
    -Data and other attributes inherited from ElementToken:
    -
    errorOnFail = None
    - -
    expanded = 0
    - -
    lookahead = 0
    - -
    negative = 0
    - -
    optional = 0
    - -
    repeating = 0
    - -
    report = 1
    - -

    - - - - - - - -
     
    -class _Range(ElementToken)
       Range of character values where any one of the characters may match

    -The Range token allows you to define a set of characters
    -(using a mini-grammar) of which any one may match.  By using
    -the repetition flags, it is possible to easily create such
    -common structures as "names" and "numbers".  For example:

    -        name := [a-zA-Z]+
    -        number := [0-9.eE]+

    -(Note: those are not beautifully defined examples :) ).

    -The mini-grammar for the simpleparsegrammar is defined as follows:

    -        '[',CHARBRACE?,CHARDASH?, (CHARRANGE/CHARNOBRACE)*, CHARDASH?,']'
    -        
    -that is, if a literal ']' character is wanted, you must
    -define the character as the first item in the range.  A literal
    -'-' character must appear as the first character after any
    -literal ']' character (or the beginning of the range) or as the
    -last character in the range.

    -Note: The expansion from the mini-grammar occurs before the
    -Range token is created (the simpleparse grammar does the
    -expansion), so the value attribute of the token is actually
    -the expanded string of characters.
     
     Methods defined here:
    -
    toParser(self, generator=None, noReport=0)
    Create the parser for the element token
    - -
    -Data and other attributes defined here:
    -
    requiresExpandedSet = 1
    - -
    value = ''
    - -
    -Methods inherited from ElementToken:
    -
    __init__(self, **namedarguments)
    Initialize the object with named attributes

    -This method simply takes the named attributes and
    -updates the object's dictionary with them
    - -
    __repr__(self)
    Return a readily recognisable version of ourself
    - -
    permute(self, basetable)
    Given a positive, required, non-repeating table, convert to appropriately configured table

    -This method applies generic logic for applying the
    -operational flags to a basic recipe for an element.

    -It is normally called from the elements-token's own
    -toParser method.
    - -
    terminal(self, generator)
    Determine if this element is terminal for the generator
    - -
    -Data and other attributes inherited from ElementToken:
    -
    errorOnFail = None
    - -
    expanded = 0
    - -
    lookahead = 0
    - -
    negative = 0
    - -
    optional = 0
    - -
    repeating = 0
    - -
    report = 1
    - -

    - - - - - -
     
    -Functions
           
    BMS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    CharSet(...)
    CharSet(definition)

    -Create a character set matching object from the string
    -
    FS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    FSType = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    TagTable(...)
    TagTable(definition[,cachable=1])
    -
    TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    UnicodeTagTable(...)
    TagTable(definition[,cachable=1])
    -
    charsplit(...)
    charsplit(text,char,start=0,stop=len(text))

    -Split text[start:stop] into substrings at char and
    -return the result as list of strings.
    -
    cmp(...)
    cmp(a,b)

    -Compare two valid taglist tuples w/r to their slice
    -position; this is useful for sorting joinlists.
    -
    compositeFlags(first, second, report=1)
    Composite flags from two items into overall flag-set
    -
    copyToNewFlags(target, flags)
    Copy target using combined flags
    -
    extractFlags(item, report=1)
    Extract the flags from an item as a tuple
    -
    hex2str(...)
    hex2str(text)

    -Return text interpreted as two byte HEX values converted
    -to a string.
    -
    isascii(...)
    isascii(text,start=0,stop=len(text))

    -Return 1/0 depending on whether text only contains ASCII
    -characters.
    -
    join(...)
    join(joinlist,sep='',start=0,stop=len(joinlist))

    -Copy snippets from different strings together producing a
    -new string
    -The first argument must be a list of tuples or strings;
    -tuples must be of the form (string,l,r[,...]) and turn out
    -as string[l:r]
    -NOTE: the syntax used for negative slices is different
    -than the Python standard: -1 corresponds to the first
    -character *after* the string, e.g. ('Example',0,-1) gives
    -'Example' and not 'Exampl', like in Python
    -sep is an optional separator string, start and stop
    -define the slice of joinlist that is taken into accont.
    -
    joinlist(...)
    joinlist(text,list,start=0,stop=len(text))

    -Takes a list of tuples (replacement,l,r,...) and produces
    -a taglist suitable for join() which creates a copy
    -of text where every slice [l:r] is replaced by the
    -given replacement
    -- the list must be sorted using cmp() as compare function
    -- it may not contain overlapping slices
    -- the slices may not contain negative indices
    -- if the taglist cannot contain overlapping slices, you can
    -  give this function the taglist produced by tag() directly
    -  (sorting is not needed, as the list will already be sorted)
    -- start and stop set the slice to work in, i.e. text[start:stop]
    -
    lower(...)
    lower(text)

    -Return text converted to lower case.
    -
    prefix(...)
    prefix(text,prefixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -prefix out of the tuple of strings given in prefixes.
    -If no prefix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    set(...)
    set(string,logic=1)

    -Returns a character set for string: a bit encoded version
    -of the characters occurring in string.
    -- logic can be set to 0 if all characters *not* in string
    -  should go into the set
    -
    setfind(...)
    setfind(text,set,start=0,stop=len(text))

    -Find the first occurence of any character from set in
    -text[start:stop]
    - set must be a string obtained with set()
    -DEPRECATED: use CharSet().search() instead.
    -
    setsplit(...)
    setsplit(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set,
    -omitting the splitting parts and empty substrings.
    -set must be a string obtained from set()
    -DEPRECATED: use CharSet().split() instead.
    -
    setsplitx(...)
    setsplitx(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set, so
    -that every second entry consists only of characters in set.
    -set must be a string obtained with set()
    -DEPRECATED: use CharSet().splitx() instead.
    -
    setstrip(...)
    setstrip(text,set,start=0,stop=len(text),mode=0)

    -Strip all characters in text[start:stop] appearing in set.
    -mode indicates where to strip (<0: left; =0: left and right;
    ->0: right). set must be a string obtained with set()
    -DEPRECATED: use CharSet().strip() instead.
    -
    splitat(...)
    splitat(text,char,nth=1,start=0,stop=len(text))

    -Split text[start:stop] into two substrings at the nth
    -occurance of char and return the result as 2-tuple. If the
    -character is not found, the second string is empty. nth may
    -be negative: the search is then done from the right and the
    -first string is empty in case the character is not found.
    -
    str2hex(...)
    str2hex(text)

    -Return text converted to a string consisting of two byte
    -HEX values.
    -
    suffix(...)
    suffix(text,suffixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -suffix out of the tuple of strings given in suffixes.
    -If no suffix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    tag(...)
    tag(text,tagtable,sliceleft=0,sliceright=len(text),taglist=[],context=None) 
    -Produce a tag list for a string, given a tag-table
    -- returns a tuple (success, taglist, nextindex)
    -- if taglist == None, then no taglist is created
    -
    upper(...)
    upper(text)

    -Return text converted to upper case.
    -

    - - - - - -
     
    -Data
           A2Z = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    -A2Z_charset = <Character Set object for 'A-Z'>
    -A2Z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -AllIn = 11
    -AllInCharSet = 41
    -AllInSet = 31
    -AllNotIn = 12
    -AppendMatch = 2048
    -AppendTagobj = 1024
    -AppendToTagobj = 512
    -BOYERMOORE = 0
    -Break = 0
    -Call = 201
    -CallArg = 202
    -CallTag = 256
    -EOF = 101
    -FASTSEARCH = 1
    -Fail = 100
    -Here = 1
    -Is = 13
    -IsIn = 14
    -IsInCharSet = 42
    -IsInSet = 32
    -IsNot = 15
    -IsNotIn = 15
    -Jump = 100
    -JumpTarget = 104
    -LookAhead = 4096
    -Loop = 205
    -LoopControl = 206
    -MatchFail = -1000000
    -MatchOk = 1000000
    -Move = 103
    -NoWord = 211
    -Reset = -1
    -Skip = 102
    -SubTable = 207
    -SubTableInList = 208
    -TRIVIAL = 2
    -Table = 203
    -TableInList = 204
    -ThisTable = 999
    -To = 0
    -ToBOF = 0
    -ToEOF = -1
    -Umlaute = '\xc4\xd6\xdc'
    -Umlaute_charset = <Character Set object for '\xc4\xd6\xdc'>
    -Word = 21
    -WordEnd = 23
    -WordStart = 22
    -a2z = 'abcdefghijklmnopqrstuvwxyz'
    -a2z_charset = <Character Set object for 'a-z'>
    -a2z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
    -alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'>
    -alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alphanumeric = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
    -alphanumeric_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'>
    -alphanumeric_set = '\x00\x00\x00\x00\x00\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -any = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -any_charset = <Character Set object for '\x00-\xff'>
    -any_set = '\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -formfeed = '\x0c'
    -formfeed_charset = <Character Set object for '\x0c'>
    -german_alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'
    -german_alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTU...hijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'>
    -german_alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00@\x90\x10\x00@\x10'
    -id2cmd = {-1000000: 'MatchFail', -1: 'ToEOF', 0: 'Fail/Jump', 1: 'Here', 11: 'AllIn', 12: 'AllNotIn', 13: 'Is', 14: 'IsIn', 15: 'IsNotIn', 21: 'Word', ...}
    -newline = '\r\n'
    -newline_charset = <Character Set object for '\r\n'>
    -newline_set = '\x00$\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -nonwhitespace_charset = <Character Set object for '^ \t\x0b\r\n\x0c'>
    -nonwhitespace_set = '\xff\xc1\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -number = '0123456789'
    -number_charset = <Character Set object for '0-9'>
    -number_set = '\x00\x00\x00\x00\x00\x00\xff\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -sFindWord = 213
    -sWordEnd = 212
    -sWordStart = 211
    -tagtable_cache = {(46912536021760, 0): <String Tag Table object>, (46912540134840, 0): <String Tag Table object>, (46912541410080, 0): <String Tag Table object>, (46912541454848, 0): <String Tag Table object>, (46912541455136, 0): <String Tag Table object>, (46912541455208, 0): <String Tag Table object>, (46912541489264, 0): <String Tag Table object>, (46912541566016, 0): <String Tag Table object>, (46912543903688, 0): <String Tag Table object>, (46912543908136, 0): <String Tag Table object>, ...}
    -to_lower = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -to_upper = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -umlaute = '\xe4\xf6\xfc\xdf'
    -umlaute_charset = <Character Set object for '\xe4\xf6\xfc\xdf'>
    -white = ' \t\x0b'
    -white_charset = <Character Set object for ' \t\x0b'>
    -white_set = '\x00\x02\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -whitespace = ' \t\x0b\r\n\x0c'
    -whitespace_charset = <Character Set object for ' \t\x0b\r\n\x0c'>
    -whitespace_set = '\x00&\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.parser.html simpleparse-2.2.0/doc/pydoc/simpleparse.parser.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.parser.html 2006-02-19 01:05:13.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.parser.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,298 +0,0 @@ - - -Python: module simpleparse.parser - - - - -
     
    - 
    simpleparse.parser
    index
    /home/mcfletch/pylive/simpleparse/parser.py
    -

    Real-world parsers using the SimpleParse EBNF

    -

    - - - - - -
     
    -Modules
           
    simpleparse.baseparser
    -
    simpleparse.common
    -
    simpleparse.simpleparsegrammar
    -

    - - - - - -
     
    -Classes
           
    -
    BaseParser -
    -
    -
    Parser -
    -
    -
    -

    - - - - - - - -
     
    -class Parser(BaseParser)
       EBNF-generated Parsers with results-handling

    -The Parser is a two-stage object:
    -        Passed an EBNF definition during initialisation,
    -        it compiles the definition into a tagging table
    -        (which in turn requires creating a tagging table
    -        for parsing the EBNF).

    -        You then call the parser's parse method to
    -        perform the actual parsing of your data, with the
    -        parser passing the results to your processor object
    -        and then back to you.
     
     Methods defined here:
    -
    __init__(self, declaration, root='root', prebuilts=(), definitionSources=[{'locale_day_abbrs': FirstOfGroup( - children = [ - Literal( -...l( - value = 'Fri', - ), - ], -), 'locale_day_abbrs_lc': FirstOfGroup( - children = [ - Literal( -...l( - value = 'fri', - ), - ], -), 'locale_day_abbrs_uc': FirstOfGroup( - children = [ - Literal( -...l( - value = 'FRI', - ), - ], -), 'locale_day_names': FirstOfGroup( - children = [ - Literal( -... value = 'Friday', - ), - ], -), 'locale_day_names_lc': FirstOfGroup( - children = [ - Literal( -... value = 'friday', - ), - ], -), 'locale_day_names_uc': FirstOfGroup( - children = [ - Literal( -... value = 'FRIDAY', - ), - ], -), 'locale_month_abbrs': FirstOfGroup( - children = [ - Literal( -...eral( - value = '', - ), - ], -), 'locale_month_abbrs_lc': FirstOfGroup( - children = [ - Literal( -...eral( - value = '', - ), - ], -), 'locale_month_abbrs_uc': FirstOfGroup( - children = [ - Literal( -...eral( - value = '', - ), - ], -), 'locale_month_names': FirstOfGroup( - children = [ - Literal( -...eral( - value = '', - ), - ], -), ...}, {'EOF': Prebuilt( - value = ((None, 101, 1),), -), 'ascii_letter': Range( - value = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', -), 'ascii_letters': Range( - repeating = 1, - value = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', -), 'ascii_lowercase': Range( - repeating = 1, - value = 'abcdefghijklmnopqrstuvwxyz', -), 'ascii_lowercasechar': Range( - value = 'abcdefghijklmnopqrstuvwxyz', -), 'ascii_uppercase': Range( - repeating = 1, - value = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', -), 'ascii_uppercasechar': Range( - value = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', -), 'digit': Range( - value = '0123456789', -), 'digits': Range( - repeating = 1, - value = '0123456789', -), 'hexdigit': Range( - value = '0123456789abcdefABCDEF', -), ...}, {'c_comment': LibraryElement( - production = 'slashbang_comm...enerator.Generator instance at 0x2aaaad5dedd0>, -), 'c_nest_comment': LibraryElement( - production = 'slashbang_nest...enerator.Generator instance at 0x2aaaad5e7170>, -), 'hash_comment': LibraryElement( - production = 'hash_comment',...enerator.Generator instance at 0x2aaaad5d9128>, -), 'semicolon_comment': LibraryElement( - production = 'semicolon_comm...enerator.Generator instance at 0x2aaaad5d9128>, -), 'slashbang_comment': LibraryElement( - production = 'slashbang_comm...enerator.Generator instance at 0x2aaaad5dedd0>, -), 'slashbang_nest_comment': LibraryElement( - production = 'slashbang_nest...enerator.Generator instance at 0x2aaaad5e7170>, -), 'slashslash_comment': LibraryElement( - production = 'slashslash_com...enerator.Generator instance at 0x2aaaad5d9128>, -)}, {'binary_number': LibraryElement( - production = 'binary_number'...enerator.Generator instance at 0x2aaaad5f69e0>, -), 'float': LibraryElement( - production = 'float', - ge...enerator.Generator instance at 0x2aaaad5f69e0>, -), 'float_floatexp': LibraryElement( - production = 'float_floatexp...enerator.Generator instance at 0x2aaaad5f69e0>, -), 'hex': LibraryElement( - production = 'hex', - gene...enerator.Generator instance at 0x2aaaad5f69e0>, -), 'imaginary_number': LibraryElement( - production = 'imaginary_numb...enerator.Generator instance at 0x2aaaad5f69e0>, -), 'int': LibraryElement( - production = 'int', - gene...enerator.Generator instance at 0x2aaaad5f69e0>, -), 'int_unsigned': LibraryElement( - production = 'int_unsigned',...enerator.Generator instance at 0x2aaaad5f69e0>, -), 'number': LibraryElement( - production = 'number', - g...enerator.Generator instance at 0x2aaaad5f69e0>, -), 'number_full': LibraryElement( - production = 'number_full', -...enerator.Generator instance at 0x2aaaad5f69e0>, -)}, {'ISO_date': LibraryElement( - production = 'ISO_date', - ...enerator.Generator instance at 0x2aaaad5dca28>, -), 'ISO_date_time': LibraryElement( - production = 'ISO_date_time'...enerator.Generator instance at 0x2aaaad5dca28>, -), 'ISO_time': LibraryElement( - production = 'ISO_time', - ...enerator.Generator instance at 0x2aaaad5dca28>, -)}, {'ISO_date_loose': LibraryElement( - production = 'ISO_date_loose...enerator.Generator instance at 0x2aaaad6009e0>, -), 'ISO_date_time_loose': LibraryElement( - production = 'ISO_date_time_...enerator.Generator instance at 0x2aaaad6009e0>, -), 'ISO_time_loose': LibraryElement( - production = 'ISO_time_loose...enerator.Generator instance at 0x2aaaad6009e0>, -)}, {'military_alphabet_char': FirstOfGroup( - children = [ - Literal( -...( - value = 'Zulu', - ), - ], -), 'military_alphabet_char_lower': FirstOfGroup( - children = [ - Literal( -...( - value = 'zulu', - ), - ], -)}, {'string': LibraryElement( - production = 'string', - g...enerator.Generator instance at 0x2aaaad608d88>, -), 'string_double_quote': LibraryElement( - production = 'str', - gene...enerator.Generator instance at 0x2aaaad82b200>, -), 'string_single_quote': LibraryElement( - production = 'str', - gene...enerator.Generator instance at 0x2aaaad602680>, -), 'string_triple_double': LibraryElement( - production = 'str', - gene...enerator.Generator instance at 0x2aaaad5d95a8>, -), 'string_triple_single': LibraryElement( - production = 'str', - gene...enerator.Generator instance at 0x2aaaad827710>, -)}, {'civilian_timezone_name': FirstOfGroup( - children = [ - Literal( -... - value = 'ACSST', - ), - ], -), 'military_timezone_name': FirstOfGroup( - children = [ - Literal( -...ral( - value = 'A', - ), - ], -), 'timezone_name': FirstOfGroup( - children = [ - Literal( -...ral( - value = 'A', - ), - ], -)}])
    Initialise the parser, creating the tagging table for it

    -declaration -- simpleparse ebnf declaration of the language being parsed
    -root -- root production used for parsing if none explicitly specified
    -prebuilts -- sequence of (name,value) tuples with prebuilt tables, values
    -        can be either objectgenerator EventToken sub-classes or TextTools
    -        tables
    -definitionSources -- dictionaries of common constructs for use
    -        in building your grammar
    - -
    buildTagger(self, production=None, processor=None)
    Get a particular parsing table for a particular production
    - -
    -Methods inherited from BaseParser:
    -
    buildProcessor(self)
    Build default processor object for this parser class

    -The default implementation returns None.  The processor
    -can either implement the "method source" API (just provides
    -information about Callouts and the like), or the processor
    -API and the method-source API.  The processor API merely
    -requires that the object be callable, and have the signature:

    -        object( (success, children, nextPosition), buffer)

    -(Note: your object can treat the first item as a single tuple
    -if it likes).

    -See: simpleparse.processor module for details.
    - -
    parse(self, data, production=None, processor=None, start=0, stop=None)
    Parse data with production "production" of this parser

    -data -- data to be parsed, a Python string, for now
    -production -- optional string specifying a non-default production to use
    -        for parsing data
    -processor -- optional pointer to a Processor or MethodSource object for
    -        use in determining reporting format and/or post-processing the results
    -        of the parsing pass.  Can be None if neither is desired (default)
    -start -- starting index for the parsing, default 0
    -stop -- stoping index for the parsing, default len(data)
    - -
    resetBeforeParse(self)
    Called just before the parser's parse method starts working,

    -Allows you to set up special-purpose structures, such as stacks
    -or local storage values.  There is no base implementation.  The
    -base implementation does nothing.
    - -

    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.printers.html simpleparse-2.2.0/doc/pydoc/simpleparse.printers.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.printers.html 2006-02-19 01:05:13.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.printers.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,72 +0,0 @@ - - -Python: module simpleparse.printers - - - - -
     
    - 
    simpleparse.printers
    index
    /home/mcfletch/pylive/simpleparse/printers.py
    -

    Utility to print Python code for a given generator object's element tokens

    -

    - - - - - -
     
    -Modules
           
    string
    -

    - - - - - -
     
    -Classes
           
    -
    _GeneratorFormatter -
    -

    - - - - - - - -
     
    -class _GeneratorFormatter
       Singleton Class to give a generator's element tokens as a source string

    -Call this as:
    -        printers.asGenerator( generator ) to get a Python source string
    -        that tries to recreate the generator as a set of objectgenerator
    -        element token objects (as seen in simpleparsegrammar).
     
     Methods defined here:
    -
    __call__(self, generator)
    - -
    reprObject(self, obj, depth=0, indent=' ')
    Return a recognisable version of an objectgenerator element token
    - -
    -Data and other attributes defined here:
    -
    HEAD = 'from simpleparse import generator\nfrom simplepar...return GENERATOR.buildParser( name, processor )\n\n'
    - -
    ITEM = 'GENERATOR.addDefinition(\n %(name)s,\n %(element)s,\n)\n'
    - -

    - - - - - -
     
    -Functions
           
    asObject = reprObject(self, obj, depth=0, indent=' ') method of _GeneratorFormatter instance
    Return a recognisable version of an objectgenerator element token
    -

    - - - - - -
     
    -Data
           asGenerator = <simpleparse.printers._GeneratorFormatter instance>
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.processor.html simpleparse-2.2.0/doc/pydoc/simpleparse.processor.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.processor.html 2006-02-19 01:05:13.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.processor.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,84 +0,0 @@ - - -Python: module simpleparse.processor - - - - -
     
    - 
    simpleparse.processor
    index
    /home/mcfletch/pylive/simpleparse/processor.py
    -

    Definitions of the MethodSource and Processor APIs

    -

    - - - - - -
     
    -Classes
           
    -
    MethodSource -
    -
    -
    Processor -
    -
    -
    -

    - - - - - - - -
     
    -class MethodSource
       Base class for MethodSource objects (including Processors and Parsers)
    -Most applications will use either Processor or Parser objects, rather
    -than directly using a MethodSource object.

    -The MethodSource is basically just a generic object whose attributes
    -are accessed during generation and/or post-processing of parse results.
    -The following are the special attribute forms for use in 

    -        _m_productionname -- alters the method used in the TextTools
    -                engine for storing results.  If this is a callable object,
    -                then call the object with:
    -                        object( taglist,text,l,r,subtags )
    -                        
    -                If it is TextTools.AppendToTagobj, then append the result
    -                tuple to the associated object (_o_productionname).  This
    -                requires that _o_productionname have an "append" method,
    -                obviously.

    -                If it is the constant TextTools.AppendMatch, then append
    -                the string value which matched the production.
    -                
    -                If it is TextTools.AppendTagobj, then append the associated
    -                tagobject itself to the results tree.
    -                
    -        _o_productionname -- with AppendToTagobj, AppendTagobj and
    -                cases where there is no _m_productionname defined, this
    -                allows you to provide an explicit tagobject for reporting
    -                in the results tree/getting called with results.
     
     

    - - - - - - - -
     
    -class Processor(MethodSource)
       Provides definition of a generic processing API

    -Basically, a Processor has a method __call__ which takes
    -two arguments, a value (which is either a 3-tuple or a 4-tuple
    -depending on whether a top-level production is being processed),
    -and a pointer to the buffer being parsed.
     
     Methods defined here:
    -
    __call__(self, value, buffer)
    Process the results of a parsing run over buffer
    - -
    __repr__(self)
    Return a representation of the class
    - -

    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.setup.html simpleparse-2.2.0/doc/pydoc/simpleparse.setup.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.setup.html 2006-02-19 01:05:13.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.setup.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,194 +0,0 @@ - - -Python: module simpleparse.setup - - - - -
     
    - 
    simpleparse.setup
    index
    /home/mcfletch/pylive/simpleparse/setup.py
    -

    Installs SimpleParse using distutils

    -Run:
    -        python setup.py install
    -to install the packages from the source archive.

    -

    - - - - - -
     
    -Modules
           
    os
    -
    re
    -
    string
    -
    sys
    -

    - - - - - -
     
    -Classes
           
    -
    install_data(Command) -
    -
    -
    smart_install_data -
    -
    -
    -

    - - - - - -
     
    -class smart_install_data(install_data)
        
    Method resolution order:
    -
    smart_install_data
    -
    install_data
    -
    Command
    -
    -
    -Methods defined here:
    -
    run(self)
    - -
    -Methods inherited from install_data:
    -
    finalize_options(self)
    - -
    get_inputs(self)
    - -
    get_outputs(self)
    - -
    initialize_options(self)
    - -
    -Data and other attributes inherited from install_data:
    -
    boolean_options = ['force']
    - -
    description = 'install data files'
    - -
    user_options = [('install-dir=', 'd', 'base directory for installing data files (default: installation base dir)'), ('root=', None, 'install everything relative to this alternate root directory'), ('force', 'f', 'force installation (overwrite existing files)')]
    - -
    -Methods inherited from Command:
    -
    __getattr__(self, attr)
    - -
    __init__(self, dist)
    Create and initialize a new Command object.  Most importantly,
    -invokes the 'initialize_options()' method, which is the real
    -initializer and depends on the actual command being
    -instantiated.
    - -
    announce(self, msg, level=1)
    If the current verbosity level is of greater than or equal to
    -'level' print 'msg' to stdout.
    - -
    copy_file(self, infile, outfile, preserve_mode=1, preserve_times=1, link=None, level=1)
    Copy a file respecting verbose, dry-run and force flags.  (The
    -former two default to whatever is in the Distribution object, and
    -the latter defaults to false for commands that don't define it.)
    - -
    copy_tree(self, infile, outfile, preserve_mode=1, preserve_times=1, preserve_symlinks=0, level=1)
    Copy an entire directory tree respecting verbose, dry-run,
    -and force flags.
    - -
    debug_print(self, msg)
    Print 'msg' to stdout if the global DEBUG (taken from the
    -DISTUTILS_DEBUG environment variable) flag is true.
    - -
    dump_options(self, header=None, indent='')
    - -
    ensure_dirname(self, option)
    - -
    ensure_filename(self, option)
    Ensure that 'option' is the name of an existing file.
    - -
    ensure_finalized(self)
    - -
    ensure_string(self, option, default=None)
    Ensure that 'option' is a string; if not defined, set it to
    -'default'.
    - -
    ensure_string_list(self, option)
    Ensure that 'option' is a list of strings.  If 'option' is
    -currently a string, we split it either on /,\s*/ or /\s+/, so
    -"foo bar baz", "foo,bar,baz", and "foo,   bar baz" all become
    -["foo", "bar", "baz"].
    - -
    execute(self, func, args, msg=None, level=1)
    - -
    get_command_name(self)
    - -
    get_finalized_command(self, command, create=1)
    Wrapper around Distribution's 'get_command_obj()' method: find
    -(create if necessary and 'create' is true) the command object for
    -'command', call its 'ensure_finalized()' method, and return the
    -finalized command object.
    - -
    get_sub_commands(self)
    Determine the sub-commands that are relevant in the current
    -distribution (ie., that need to be run).  This is based on the
    -'sub_commands' class attribute: each tuple in that list may include
    -a method that we call to determine if the subcommand needs to be
    -run for the current distribution.  Return a list of command names.
    - -
    make_archive(self, base_name, format, root_dir=None, base_dir=None)
    - -
    make_file(self, infiles, outfile, func, args, exec_msg=None, skip_msg=None, level=1)
    Special case of 'execute()' for operations that process one or
    -more input files and generate one output file.  Works just like
    -'execute()', except the operation is skipped and a different
    -message printed if 'outfile' already exists and is newer than all
    -files listed in 'infiles'.  If the command defined 'self.force',
    -and it is true, then the command is unconditionally run -- does no
    -timestamp checks.
    - -
    mkpath(self, name, mode=511)
    - -
    move_file(self, src, dst, level=1)
    Move a file respectin dry-run flag.
    - -
    reinitialize_command(self, command, reinit_subcommands=0)
    # XXX rename to 'get_reinitialized_command()'? (should do the
    -# same in dist.py, if so)
    - -
    run_command(self, command)
    Run some other command: uses the 'run_command()' method of
    -Distribution, which creates and finalizes the command object if
    -necessary and then invokes its 'run()' method.
    - -
    set_undefined_options(self, src_cmd, *option_pairs)
    Set the values of any "undefined" options from corresponding
    -option values in some other command object.  "Undefined" here means
    -"is None", which is the convention used to indicate that an option
    -has not been changed between 'initialize_options()' and
    -'finalize_options()'.  Usually called from 'finalize_options()' for
    -options that depend on some other command rather than another
    -option of the same command.  'src_cmd' is the other command from
    -which option values will be taken (a command object will be created
    -for it if necessary); the remaining arguments are
    -'(src_option,dst_option)' tuples which mean "take the value of
    -'src_option' in the 'src_cmd' command object, and copy it to
    -'dst_option' in the current command object".
    - -
    spawn(self, cmd, search_path=1, level=1)
    Spawn an external command respecting dry-run flag.
    - -
    warn(self, msg)
    - -
    -Data and other attributes inherited from Command:
    -
    sub_commands = []
    - -

    - - - - - -
     
    -Functions
           
    isPackage(filename)
    -
    npFilesFor(dirname)
    Return all non-python-file filenames in dir
    -
    packagesFor(filename, basePackage='')
    Find all packages in filename
    -

    - - - - - -
     
    -Data
           EXEC_PREFIX = '/usr'
    -PREFIX = '/usr'
    -dataFiles = []
    -packages = {}
    -python_build = False
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.setupstt.html simpleparse-2.2.0/doc/pydoc/simpleparse.setupstt.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.setupstt.html 2006-02-19 01:05:13.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.setupstt.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,2119 +0,0 @@ - - -Python: module simpleparse.setupstt - - - - -
     
    - 
    simpleparse.setupstt (version 2.1.1)
    index
    /home/mcfletch/pylive/simpleparse/setupstt.pyc
    -

    Distutils Extensions needed for the mx Extensions.

    -Copyright (c) 1997-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com
    -Copyright (c) 2000-2003, eGenix.com Software GmbH; mailto:info@egenix.com
    -See the documentation for further information on copyrights,
    -or contact the author. All Rights Reserved.

    -

    - - - - - -
     
    -Modules
           
    distutils
    -glob
    -
    os
    -re
    -
    string
    -sys
    -
    types
    -

    - - - - - -
     
    -Classes
           
    -
    Command -
    -
    -
    mx_build_unixlib -
    mx_uninstall -
    -
    -
    bdist_dumb(Command) -
    -
    -
    mx_bdist_zope -
    -
    -
    bdist_rpm(Command) -
    -
    -
    mx_bdist_rpm -
    -
    -
    build(Command) -
    -
    -
    mx_build -
    -
    -
    build_clib(Command) -
    -
    -
    mx_build_clib -
    -
    -
    build_py(Command) -
    -
    -
    mx_build_py -
    -
    -
    install(Command) -
    -
    -
    mx_install -
    -
    -
    install_data(Command) -
    -
    -
    mx_install_data -
    -
    -
    Distribution -
    -
    -
    mx_Distribution -
    -
    -
    Extension -
    -
    -
    mx_Extension -
    -
    -
    CompilerSupportMixin -
    -
    -
    mx_autoconf(CompilerSupportMixin, config) -
    mx_build_ext(CompilerSupportMixin, build_ext) -
    -
    -
    UnixLibrary -
    -

    - - - - - - - -
     
    -class CompilerSupportMixin
       Compiler support mixin which makes sure that the .compiler
    -attribute is properly setup.
     
     Methods defined here:
    -
    prepare_compiler(self)
    - -
    -Data and other attributes defined here:
    -
    prepared_compiler = 0
    - -

    - - - - - - - -
     
    -class UnixLibrary
       Container for library configuration data.
     
     Methods defined here:
    -
    __init__(self, libname, sourcetree, libfiles, configure=None, configure_options=None, make_options=None)
    - -
    get(self, option, alternative=None)
    - -
    -Data and other attributes defined here:
    -
    configure = 'configure'
    - -
    configure_options = None
    - -
    libfiles = None
    - -
    libname = ''
    - -
    make_options = None
    - -
    sourcetree = ''
    - -

    - - - - - - - -
     
    -class mx_Distribution(Distribution)
       Distribution class which knows about our distutils extensions.
     
     Methods defined here:
    -
    has_unixlibs(self)
    - -
    -Data and other attributes defined here:
    -
    display_option_names = ['help_commands', 'name', 'version', 'fullname', 'author', 'author_email', 'maintainer', 'maintainer_email', 'contact', 'contact_email', 'url', 'license', 'licence', 'description', 'long_description', 'platforms', 'classifiers', 'keywords', 'classifiers']
    - -
    display_options = [('help-commands', None, 'list all available commands'), ('name', None, 'print package name'), ('version', 'V', 'print package version'), ('fullname', None, 'print <package name>-<version>'), ('author', None, "print the author's name"), ('author-email', None, "print the author's email address"), ('maintainer', None, "print the maintainer's name"), ('maintainer-email', None, "print the maintainer's email address"), ('contact', None, "print the maintainer's name if known, else the author's"), ('contact-email', None, "print the maintainer's email address if known, else the author's"), ('url', None, 'print the URL for this package'), ('license', None, 'print the license of the package'), ('licence', None, 'alias for --license'), ('description', None, 'print the package description'), ('long-description', None, 'print the long package description'), ('platforms', None, 'print the list of platforms'), ('classifiers', None, 'print the list of classifiers'), ('keywords', None, 'print the list of keywords'), ('classifiers', None, 'print the list of classifiers (not yet supported)')]
    - -
    unixlibs = None
    - -
    -Methods inherited from Distribution:
    -
    __init__(self, attrs=None)
    Construct a new Distribution instance: initialize all the
    -attributes of a Distribution, and then use 'attrs' (a dictionary
    -mapping attribute names to values) to assign some of those
    -attributes their "real" values.  (Any attributes not mentioned in
    -'attrs' will be assigned to some null value: 0, None, an empty list
    -or dictionary, etc.)  Most importantly, initialize the
    -'command_obj' attribute to the empty dictionary; this will be
    -filled in with real command objects by 'parse_command_line()'.
    - -
    announce(self, msg, level=1)
    - -
    dump_option_dicts(self, header=None, commands=None, indent='')
    - -
    finalize_options(self)
    Set final values for all the options on the Distribution
    -instance, analogous to the .finalize_options() method of Command
    -objects.
    - -
    find_config_files(self)
    Find as many configuration files as should be processed for this
    -platform, and return a list of filenames in the order in which they
    -should be parsed.  The filenames returned are guaranteed to exist
    -(modulo nasty race conditions).

    -There are three possible config files: distutils.cfg in the
    -Distutils installation directory (ie. where the top-level
    -Distutils __inst__.py file lives), a file in the user's home
    -directory named .pydistutils.cfg on Unix and pydistutils.cfg
    -on Windows/Mac, and setup.cfg in the current directory.
    - -
    get_command_class(self, command)
    Return the class that implements the Distutils command named by
    -'command'.  First we check the 'cmdclass' dictionary; if the
    -command is mentioned there, we fetch the class object from the
    -dictionary and return it.  Otherwise we load the command module
    -("distutils.command." + command) and fetch the command class from
    -the module.  The loaded class is also stored in 'cmdclass'
    -to speed future calls to 'get_command_class()'.

    -Raises DistutilsModuleError if the expected module could not be
    -found, or if that module does not define the expected class.
    - -
    get_command_list(self)
    Get a list of (command, description) tuples.
    -The list is divided into "standard commands" (listed in
    -distutils.command.__all__) and "extra commands" (mentioned in
    -self.cmdclass, but not a standard command).  The descriptions come
    -from the command class attribute 'description'.
    - -
    get_command_obj(self, command, create=1)
    Return the command object for 'command'.  Normally this object
    -is cached on a previous call to 'get_command_obj()'; if no command
    -object for 'command' is in the cache, then we either create and
    -return it (if 'create' is true) or return None.
    - -
    get_command_packages(self)
    Return a list of packages from which commands are loaded.
    - -
    get_option_dict(self, command)
    Get the option dictionary for a given command.  If that
    -command's option dictionary hasn't been created yet, then create it
    -and return the new dictionary; otherwise, return the existing
    -option dictionary.
    - -
    handle_display_options(self, option_order)
    If there were any non-global "display-only" options
    -(--help-commands or the metadata display options) on the command
    -line, display the requested info and return true; else return
    -false.
    - -
    has_c_libraries(self)
    - -
    has_data_files(self)
    - -
    has_ext_modules(self)
    - -
    has_headers(self)
    - -
    has_modules(self)
    - -
    has_pure_modules(self)
    - -
    has_scripts(self)
    - -
    is_pure(self)
    - -
    parse_command_line(self)
    Parse the setup script's command line, taken from the
    -'script_args' instance attribute (which defaults to 'sys.argv[1:]'
    --- see 'setup()' in core.py).  This list is first processed for
    -"global options" -- options that set attributes of the Distribution
    -instance.  Then, it is alternately scanned for Distutils commands
    -and options for that command.  Each new command terminates the
    -options for the previous command.  The allowed options for a
    -command are determined by the 'user_options' attribute of the
    -command class -- thus, we have to be able to load command classes
    -in order to parse the command line.  Any error in that 'options'
    -attribute raises DistutilsGetoptError; any error on the
    -command-line raises DistutilsArgError.  If no Distutils commands
    -were found on the command line, raises DistutilsArgError.  Return
    -true if command-line was successfully parsed and we should carry
    -on with executing commands; false if no errors but we shouldn't
    -execute commands (currently, this only happens if user asks for
    -help).
    - -
    parse_config_files(self, filenames=None)
    - -
    print_command_list(self, commands, header, max_length)
    Print a subset of the list of all commands -- used by
    -'print_commands()'.
    - -
    print_commands(self)
    Print out a help message listing all available commands with a
    -description of each.  The list is divided into "standard commands"
    -(listed in distutils.command.__all__) and "extra commands"
    -(mentioned in self.cmdclass, but not a standard command).  The
    -descriptions come from the command class attribute
    -'description'.
    - -
    reinitialize_command(self, command, reinit_subcommands=0)
    Reinitializes a command to the state it was in when first
    -returned by 'get_command_obj()': ie., initialized but not yet
    -finalized.  This provides the opportunity to sneak option
    -values in programmatically, overriding or supplementing
    -user-supplied values from the config files and command line.
    -You'll have to re-finalize the command object (by calling
    -'finalize_options()' or 'ensure_finalized()') before using it for
    -real.

    -'command' should be a command name (string) or command object.  If
    -'reinit_subcommands' is true, also reinitializes the command's
    -sub-commands, as declared by the 'sub_commands' class attribute (if
    -it has one).  See the "install" command for an example.  Only
    -reinitializes the sub-commands that actually matter, ie. those
    -whose test predicates return true.

    -Returns the reinitialized command object.
    - -
    run_command(self, command)
    Do whatever it takes to run a command (including nothing at all,
    -if the command has already been run).  Specifically: if we have
    -already created and run the command named by 'command', return
    -silently without doing anything.  If the command named by 'command'
    -doesn't even have a command object yet, create one.  Then invoke
    -'run()' on that command object (or an existing one).
    - -
    run_commands(self)
    Run each command that was seen on the setup script command line.
    -Uses the list of commands found and cache of command objects
    -created by 'get_command_obj()'.
    - -
    -Data and other attributes inherited from Distribution:
    -
    global_options = [('verbose', 'v', 'run verbosely (default)', 1), ('quiet', 'q', 'run quietly (turns verbosity off)'), ('dry-run', 'n', "don't actually do anything"), ('help', 'h', 'show detailed help message')]
    - -
    negative_opt = {'quiet': 'verbose'}
    - -

    - - - - - - - -
     
    -class mx_Extension(Extension)
       Extension class which allows specifying whether the extension
    -is required to build or optional.
     
     Methods defined here:
    -
    __init__(self, *args, **kws)
    - -
    -Data and other attributes defined here:
    -
    data_files = ()
    - -
    lib_types = ('shared', 'static')
    - -
    needed_includes = ()
    - -
    needed_libraries = ()
    - -
    optional_libraries = ()
    - -
    packages = ()
    - -
    required = 1
    - -
    successfully_built = 0
    - -

    - - - - - - - -
     
    -class mx_autoconf(CompilerSupportMixin, config)
       Auto-configuration class which adds some extra configuration
    -settings to the packages.
     
     
    Method resolution order:
    -
    mx_autoconf
    -
    CompilerSupportMixin
    -
    config
    -
    Command
    -
    -
    -Methods defined here:
    -
    check_bad_staticforward(self)
    Check whether the compiler does not supports forward declaring
    -static arrays.

    -For documentation of the other arguments see the base
    -class' .try_link().
    - -
    check_compiler(self, sourcecode, headers=None, include_dirs=None, libraries=None, library_dirs=None)
    Check whether sourcecode compiles and links with the current
    -compiler and link environment.

    -For documentation of the other arguments see the base
    -class' .try_link().
    - -
    check_function(self, function, headers=None, include_dirs=None, libraries=None, library_dirs=None, prototype=0, call=0)
    Check whether function is available in the given
    -compile and link environment.

    -If prototype is true, a function prototype is included in
    -the test. If call is true, a function call is generated
    -(rather than just a reference of the function symbol).

    -For documentation of the other arguments see the base
    -class' .try_link().
    - -
    check_library(self, library, library_dirs=None, headers=None, include_dirs=None, other_libraries=[])
    Check whether we can link against the given library.

    -For documentation of the other arguments see the base
    -class' .try_link().
    - -
    finalize_options(self)
    - -
    find_include_file(self, filename, paths, pattern=None)
    Find an include file of the given name.

    -The search path is determined by the paths parameter, the
    -compiler's .include_dirs attribute and the STDINCLPATH and
    -FINDINCLPATH globals. The search is done in this order.
    - -
    find_library_file(self, libname, paths, pattern=None, lib_types=('shared', 'static'))
    Find a library of the given name.

    -The search path is determined by the paths parameter, the
    -compiler's .library_dirs attribute and the STDLIBPATH and
    -FINDLIBPATH globals. The search is done in this order.

    -Shared libraries are prefered over static ones if both
    -types are given in lib_types.
    - -
    initialize_options(self)
    - -
    prepare_compiler(self)
    - -
    run(self)
    - -
    -Data and other attributes defined here:
    -
    api_checks = (('strftime', ['time.h']), ('strptime', ['time.h']), ('timegm', ['time.h']))
    - -
    description = 'auto-configuration build step (for internal use only)'
    - -
    enable_debugging = 0
    - -
    user_options = [('compiler=', None, 'specify the compiler type'), ('cc=', None, 'specify the compiler executable'), ('include-dirs=', 'I', 'list of directories to search for header files'), ('define=', 'D', 'C preprocessor macros to define'), ('undef=', 'U', 'C preprocessor macros to undefine'), ('libraries=', 'l', 'external C libraries to link with'), ('library-dirs=', 'L', 'directories to search for external C libraries'), ('noisy', None, 'show every action (compile, link, run, ...) taken'), ('dump-source', None, 'dump generated source files before attempting to compile them'), ('enable-debugging', None, 'compile with debugging support')]
    - -
    -Data and other attributes inherited from CompilerSupportMixin:
    -
    prepared_compiler = 0
    - -
    -Methods inherited from config:
    -
    check_func(self, func, headers=None, include_dirs=None, libraries=None, library_dirs=None, decl=0, call=0)
    Determine if function 'func' is available by constructing a
    -source file that refers to 'func', and compiles and links it.
    -If everything succeeds, returns true; otherwise returns false.

    -The constructed source file starts out by including the header
    -files listed in 'headers'.  If 'decl' is true, it then declares
    -'func' (as "int func()"); you probably shouldn't supply 'headers'
    -and set 'decl' true in the same call, or you might get errors about
    -a conflicting declarations for 'func'.  Finally, the constructed
    -'main()' function either references 'func' or (if 'call' is true)
    -calls it.  'libraries' and 'library_dirs' are used when
    -linking.
    - -
    check_header(self, header, include_dirs=None, library_dirs=None, lang='c')
    Determine if the system header file named by 'header_file'
    -exists and can be found by the preprocessor; return true if so,
    -false otherwise.
    - -
    check_lib(self, library, library_dirs=None, headers=None, include_dirs=None, other_libraries=[])
    Determine if 'library' is available to be linked against,
    -without actually checking that any particular symbols are provided
    -by it.  'headers' will be used in constructing the source file to
    -be compiled, but the only effect of this is to check if all the
    -header files listed are available.  Any libraries listed in
    -'other_libraries' will be included in the link, in case 'library'
    -has symbols that depend on other libraries.
    - -
    search_cpp(self, pattern, body=None, headers=None, include_dirs=None, lang='c')
    Construct a source file (just like 'try_cpp()'), run it through
    -the preprocessor, and return true if any line of the output matches
    -'pattern'.  'pattern' should either be a compiled regex object or a
    -string containing a regex.  If both 'body' and 'headers' are None,
    -preprocesses an empty file -- which can be useful to determine the
    -symbols the preprocessor and compiler set by default.
    - -
    try_compile(self, body, headers=None, include_dirs=None, lang='c')
    Try to compile a source file built from 'body' and 'headers'.
    -Return true on success, false otherwise.
    - -
    try_cpp(self, body=None, headers=None, include_dirs=None, lang='c')
    Construct a source file from 'body' (a string containing lines
    -of C/C++ code) and 'headers' (a list of header files to include)
    -and run it through the preprocessor.  Return true if the
    -preprocessor succeeded, false if there were any errors.
    -('body' probably isn't of much use, but what the heck.)
    - -
    try_link(self, body, headers=None, include_dirs=None, libraries=None, library_dirs=None, lang='c')
    Try to compile and link a source file, built from 'body' and
    -'headers', to executable form.  Return true on success, false
    -otherwise.
    - -
    try_run(self, body, headers=None, include_dirs=None, libraries=None, library_dirs=None, lang='c')
    Try to compile, link to an executable, and run a program
    -built from 'body' and 'headers'.  Return true on success, false
    -otherwise.
    - -
    -Methods inherited from Command:
    -
    __getattr__(self, attr)
    - -
    __init__(self, dist)
    Create and initialize a new Command object.  Most importantly,
    -invokes the 'initialize_options()' method, which is the real
    -initializer and depends on the actual command being
    -instantiated.
    - -
    announce(self, msg, level=1)
    If the current verbosity level is of greater than or equal to
    -'level' print 'msg' to stdout.
    - -
    copy_file(self, infile, outfile, preserve_mode=1, preserve_times=1, link=None, level=1)
    Copy a file respecting verbose, dry-run and force flags.  (The
    -former two default to whatever is in the Distribution object, and
    -the latter defaults to false for commands that don't define it.)
    - -
    copy_tree(self, infile, outfile, preserve_mode=1, preserve_times=1, preserve_symlinks=0, level=1)
    Copy an entire directory tree respecting verbose, dry-run,
    -and force flags.
    - -
    debug_print(self, msg)
    Print 'msg' to stdout if the global DEBUG (taken from the
    -DISTUTILS_DEBUG environment variable) flag is true.
    - -
    dump_options(self, header=None, indent='')
    - -
    ensure_dirname(self, option)
    - -
    ensure_filename(self, option)
    Ensure that 'option' is the name of an existing file.
    - -
    ensure_finalized(self)
    - -
    ensure_string(self, option, default=None)
    Ensure that 'option' is a string; if not defined, set it to
    -'default'.
    - -
    ensure_string_list(self, option)
    Ensure that 'option' is a list of strings.  If 'option' is
    -currently a string, we split it either on /,\s*/ or /\s+/, so
    -"foo bar baz", "foo,bar,baz", and "foo,   bar baz" all become
    -["foo", "bar", "baz"].
    - -
    execute(self, func, args, msg=None, level=1)
    - -
    get_command_name(self)
    - -
    get_finalized_command(self, command, create=1)
    Wrapper around Distribution's 'get_command_obj()' method: find
    -(create if necessary and 'create' is true) the command object for
    -'command', call its 'ensure_finalized()' method, and return the
    -finalized command object.
    - -
    get_sub_commands(self)
    Determine the sub-commands that are relevant in the current
    -distribution (ie., that need to be run).  This is based on the
    -'sub_commands' class attribute: each tuple in that list may include
    -a method that we call to determine if the subcommand needs to be
    -run for the current distribution.  Return a list of command names.
    - -
    make_archive(self, base_name, format, root_dir=None, base_dir=None)
    - -
    make_file(self, infiles, outfile, func, args, exec_msg=None, skip_msg=None, level=1)
    Special case of 'execute()' for operations that process one or
    -more input files and generate one output file.  Works just like
    -'execute()', except the operation is skipped and a different
    -message printed if 'outfile' already exists and is newer than all
    -files listed in 'infiles'.  If the command defined 'self.force',
    -and it is true, then the command is unconditionally run -- does no
    -timestamp checks.
    - -
    mkpath(self, name, mode=511)
    - -
    move_file(self, src, dst, level=1)
    Move a file respectin dry-run flag.
    - -
    reinitialize_command(self, command, reinit_subcommands=0)
    # XXX rename to 'get_reinitialized_command()'? (should do the
    -# same in dist.py, if so)
    - -
    run_command(self, command)
    Run some other command: uses the 'run_command()' method of
    -Distribution, which creates and finalizes the command object if
    -necessary and then invokes its 'run()' method.
    - -
    set_undefined_options(self, src_cmd, *option_pairs)
    Set the values of any "undefined" options from corresponding
    -option values in some other command object.  "Undefined" here means
    -"is None", which is the convention used to indicate that an option
    -has not been changed between 'initialize_options()' and
    -'finalize_options()'.  Usually called from 'finalize_options()' for
    -options that depend on some other command rather than another
    -option of the same command.  'src_cmd' is the other command from
    -which option values will be taken (a command object will be created
    -for it if necessary); the remaining arguments are
    -'(src_option,dst_option)' tuples which mean "take the value of
    -'src_option' in the 'src_cmd' command object, and copy it to
    -'dst_option' in the current command object".
    - -
    spawn(self, cmd, search_path=1, level=1)
    Spawn an external command respecting dry-run flag.
    - -
    warn(self, msg)
    - -
    -Data and other attributes inherited from Command:
    -
    sub_commands = []
    - -

    - - - - - - - -
     
    -class mx_bdist_rpm(bdist_rpm)
       bdist_rpm command which allows passing in distutils
    -options.
     
     
    Method resolution order:
    -
    mx_bdist_rpm
    -
    bdist_rpm
    -
    Command
    -
    -
    -Methods defined here:
    -
    finalize_options(self)
    - -
    -Data and other attributes defined here:
    -
    distutils_build_options = None
    - -
    distutils_install_options = None
    - -
    user_options = [('bdist-base=', None, 'base directory for creating built distributions'), ('rpm-base=', None, 'base directory for creating RPMs (defaults to "rpm" under --bdist-base; must be specified for RPM 2)'), ('dist-dir=', 'd', 'directory to put final RPM files in (and .spec files if --spec-only)'), ('python=', None, 'path to Python interpreter to hard-code in the .spec file (default: "python")'), ('fix-python', None, 'hard-code the exact path to the current Python interpreter in the .spec file'), ('spec-only', None, 'only regenerate spec file'), ('source-only', None, 'only generate source RPM'), ('binary-only', None, 'only generate binary RPM'), ('use-bzip2', None, 'use bzip2 instead of gzip to create source distribution'), ('distribution-name=', None, 'name of the (Linux) distribution to which this R...lies (*not* the name of the module distribution!)'), ('group=', None, 'package classification [default: "Development/Libraries"]'), ('release=', None, 'RPM release number'), ('serial=', None, 'RPM serial number'), ('vendor=', None, 'RPM "vendor" (eg. "Joe Blow <joe@example.com>") [default: maintainer or author from setup script]'), ('packager=', None, 'RPM packager (eg. "Jane Doe <jane@example.net>")[default: vendor]'), ('doc-files=', None, 'list of documentation files (space or comma-separated)'), ('changelog=', None, 'RPM changelog'), ('icon=', None, 'name of icon file'), ('provides=', None, 'capabilities provided by this package'), ('requires=', None, 'capabilities required by this package'), ...]
    - -
    -Methods inherited from bdist_rpm:
    -
    finalize_package_data(self)
    - -
    initialize_options(self)
    - -
    run(self)
    - -
    -Data and other attributes inherited from bdist_rpm:
    -
    boolean_options = ['keep-temp', 'use-rpm-opt-flags', 'rpm3-mode', 'no-autoreq']
    - -
    description = 'create an RPM distribution'
    - -
    negative_opt = {'no-keep-temp': 'keep-temp', 'no-rpm-opt-flags': 'use-rpm-opt-flags', 'rpm2-mode': 'rpm3-mode'}
    - -
    -Methods inherited from Command:
    -
    __getattr__(self, attr)
    - -
    __init__(self, dist)
    Create and initialize a new Command object.  Most importantly,
    -invokes the 'initialize_options()' method, which is the real
    -initializer and depends on the actual command being
    -instantiated.
    - -
    announce(self, msg, level=1)
    If the current verbosity level is of greater than or equal to
    -'level' print 'msg' to stdout.
    - -
    copy_file(self, infile, outfile, preserve_mode=1, preserve_times=1, link=None, level=1)
    Copy a file respecting verbose, dry-run and force flags.  (The
    -former two default to whatever is in the Distribution object, and
    -the latter defaults to false for commands that don't define it.)
    - -
    copy_tree(self, infile, outfile, preserve_mode=1, preserve_times=1, preserve_symlinks=0, level=1)
    Copy an entire directory tree respecting verbose, dry-run,
    -and force flags.
    - -
    debug_print(self, msg)
    Print 'msg' to stdout if the global DEBUG (taken from the
    -DISTUTILS_DEBUG environment variable) flag is true.
    - -
    dump_options(self, header=None, indent='')
    - -
    ensure_dirname(self, option)
    - -
    ensure_filename(self, option)
    Ensure that 'option' is the name of an existing file.
    - -
    ensure_finalized(self)
    - -
    ensure_string(self, option, default=None)
    Ensure that 'option' is a string; if not defined, set it to
    -'default'.
    - -
    ensure_string_list(self, option)
    Ensure that 'option' is a list of strings.  If 'option' is
    -currently a string, we split it either on /,\s*/ or /\s+/, so
    -"foo bar baz", "foo,bar,baz", and "foo,   bar baz" all become
    -["foo", "bar", "baz"].
    - -
    execute(self, func, args, msg=None, level=1)
    - -
    get_command_name(self)
    - -
    get_finalized_command(self, command, create=1)
    Wrapper around Distribution's 'get_command_obj()' method: find
    -(create if necessary and 'create' is true) the command object for
    -'command', call its 'ensure_finalized()' method, and return the
    -finalized command object.
    - -
    get_sub_commands(self)
    Determine the sub-commands that are relevant in the current
    -distribution (ie., that need to be run).  This is based on the
    -'sub_commands' class attribute: each tuple in that list may include
    -a method that we call to determine if the subcommand needs to be
    -run for the current distribution.  Return a list of command names.
    - -
    make_archive(self, base_name, format, root_dir=None, base_dir=None)
    - -
    make_file(self, infiles, outfile, func, args, exec_msg=None, skip_msg=None, level=1)
    Special case of 'execute()' for operations that process one or
    -more input files and generate one output file.  Works just like
    -'execute()', except the operation is skipped and a different
    -message printed if 'outfile' already exists and is newer than all
    -files listed in 'infiles'.  If the command defined 'self.force',
    -and it is true, then the command is unconditionally run -- does no
    -timestamp checks.
    - -
    mkpath(self, name, mode=511)
    - -
    move_file(self, src, dst, level=1)
    Move a file respectin dry-run flag.
    - -
    reinitialize_command(self, command, reinit_subcommands=0)
    # XXX rename to 'get_reinitialized_command()'? (should do the
    -# same in dist.py, if so)
    - -
    run_command(self, command)
    Run some other command: uses the 'run_command()' method of
    -Distribution, which creates and finalizes the command object if
    -necessary and then invokes its 'run()' method.
    - -
    set_undefined_options(self, src_cmd, *option_pairs)
    Set the values of any "undefined" options from corresponding
    -option values in some other command object.  "Undefined" here means
    -"is None", which is the convention used to indicate that an option
    -has not been changed between 'initialize_options()' and
    -'finalize_options()'.  Usually called from 'finalize_options()' for
    -options that depend on some other command rather than another
    -option of the same command.  'src_cmd' is the other command from
    -which option values will be taken (a command object will be created
    -for it if necessary); the remaining arguments are
    -'(src_option,dst_option)' tuples which mean "take the value of
    -'src_option' in the 'src_cmd' command object, and copy it to
    -'dst_option' in the current command object".
    - -
    spawn(self, cmd, search_path=1, level=1)
    Spawn an external command respecting dry-run flag.
    - -
    warn(self, msg)
    - -
    -Data and other attributes inherited from Command:
    -
    sub_commands = []
    - -

    - - - - - - - -
     
    -class mx_bdist_zope(bdist_dumb)
       Build binary Zope product distribution.
     
     
    Method resolution order:
    -
    mx_bdist_zope
    -
    bdist_dumb
    -
    Command
    -
    -
    -Methods defined here:
    -
    finalize_options(self)
    - -
    reinitialize_command(self, command, reinit_subcommands=0)
    - -
    -Methods inherited from bdist_dumb:
    -
    initialize_options(self)
    - -
    run(self)
    - -
    -Data and other attributes inherited from bdist_dumb:
    -
    boolean_options = ['keep-temp', 'skip-build', 'relative']
    - -
    default_format = {'nt': 'zip', 'os2': 'zip', 'posix': 'gztar'}
    - -
    description = 'create a "dumb" built distribution'
    - -
    user_options = [('bdist-dir=', 'd', 'temporary directory for creating the distribution'), ('plat-name=', 'p', 'platform name to embed in generated filenames (default: linux-x86_64)'), ('format=', 'f', 'archive format to create (tar, ztar, gztar, zip)'), ('keep-temp', 'k', 'keep the pseudo-installation tree around after creating the distribution archive'), ('dist-dir=', 'd', 'directory to put final built distributions in'), ('skip-build', None, 'skip rebuilding everything (for testing/debugging)'), ('relative', None, 'build the archive using relative paths(default: false)')]
    - -
    -Methods inherited from Command:
    -
    __getattr__(self, attr)
    - -
    __init__(self, dist)
    Create and initialize a new Command object.  Most importantly,
    -invokes the 'initialize_options()' method, which is the real
    -initializer and depends on the actual command being
    -instantiated.
    - -
    announce(self, msg, level=1)
    If the current verbosity level is of greater than or equal to
    -'level' print 'msg' to stdout.
    - -
    copy_file(self, infile, outfile, preserve_mode=1, preserve_times=1, link=None, level=1)
    Copy a file respecting verbose, dry-run and force flags.  (The
    -former two default to whatever is in the Distribution object, and
    -the latter defaults to false for commands that don't define it.)
    - -
    copy_tree(self, infile, outfile, preserve_mode=1, preserve_times=1, preserve_symlinks=0, level=1)
    Copy an entire directory tree respecting verbose, dry-run,
    -and force flags.
    - -
    debug_print(self, msg)
    Print 'msg' to stdout if the global DEBUG (taken from the
    -DISTUTILS_DEBUG environment variable) flag is true.
    - -
    dump_options(self, header=None, indent='')
    - -
    ensure_dirname(self, option)
    - -
    ensure_filename(self, option)
    Ensure that 'option' is the name of an existing file.
    - -
    ensure_finalized(self)
    - -
    ensure_string(self, option, default=None)
    Ensure that 'option' is a string; if not defined, set it to
    -'default'.
    - -
    ensure_string_list(self, option)
    Ensure that 'option' is a list of strings.  If 'option' is
    -currently a string, we split it either on /,\s*/ or /\s+/, so
    -"foo bar baz", "foo,bar,baz", and "foo,   bar baz" all become
    -["foo", "bar", "baz"].
    - -
    execute(self, func, args, msg=None, level=1)
    - -
    get_command_name(self)
    - -
    get_finalized_command(self, command, create=1)
    Wrapper around Distribution's 'get_command_obj()' method: find
    -(create if necessary and 'create' is true) the command object for
    -'command', call its 'ensure_finalized()' method, and return the
    -finalized command object.
    - -
    get_sub_commands(self)
    Determine the sub-commands that are relevant in the current
    -distribution (ie., that need to be run).  This is based on the
    -'sub_commands' class attribute: each tuple in that list may include
    -a method that we call to determine if the subcommand needs to be
    -run for the current distribution.  Return a list of command names.
    - -
    make_archive(self, base_name, format, root_dir=None, base_dir=None)
    - -
    make_file(self, infiles, outfile, func, args, exec_msg=None, skip_msg=None, level=1)
    Special case of 'execute()' for operations that process one or
    -more input files and generate one output file.  Works just like
    -'execute()', except the operation is skipped and a different
    -message printed if 'outfile' already exists and is newer than all
    -files listed in 'infiles'.  If the command defined 'self.force',
    -and it is true, then the command is unconditionally run -- does no
    -timestamp checks.
    - -
    mkpath(self, name, mode=511)
    - -
    move_file(self, src, dst, level=1)
    Move a file respectin dry-run flag.
    - -
    run_command(self, command)
    Run some other command: uses the 'run_command()' method of
    -Distribution, which creates and finalizes the command object if
    -necessary and then invokes its 'run()' method.
    - -
    set_undefined_options(self, src_cmd, *option_pairs)
    Set the values of any "undefined" options from corresponding
    -option values in some other command object.  "Undefined" here means
    -"is None", which is the convention used to indicate that an option
    -has not been changed between 'initialize_options()' and
    -'finalize_options()'.  Usually called from 'finalize_options()' for
    -options that depend on some other command rather than another
    -option of the same command.  'src_cmd' is the other command from
    -which option values will be taken (a command object will be created
    -for it if necessary); the remaining arguments are
    -'(src_option,dst_option)' tuples which mean "take the value of
    -'src_option' in the 'src_cmd' command object, and copy it to
    -'dst_option' in the current command object".
    - -
    spawn(self, cmd, search_path=1, level=1)
    Spawn an external command respecting dry-run flag.
    - -
    warn(self, msg)
    - -
    -Data and other attributes inherited from Command:
    -
    sub_commands = []
    - -

    - - - - - - - -
     
    -class mx_build(build)
       build command which knows about our distutils extensions.
     
     
    Method resolution order:
    -
    mx_build
    -
    build
    -
    Command
    -
    -
    -Methods defined here:
    -
    has_unixlibs(self)
    - -
    -Data and other attributes defined here:
    -
    sub_commands = [('build_clib', <unbound method build.has_c_libraries>), ('build_unixlib', <function has_unixlibs>), ('mx_autoconf', <unbound method build.has_ext_modules>), ('build_ext', <unbound method build.has_ext_modules>), ('build_py', <unbound method build.has_pure_modules>), ('build_scripts', <unbound method build.has_scripts>)]
    - -
    -Methods inherited from build:
    -
    finalize_options(self)
    - -
    has_c_libraries(self)
    - -
    has_ext_modules(self)
    - -
    has_pure_modules(self)
    - -
    has_scripts(self)
    - -
    initialize_options(self)
    - -
    run(self)
    - -
    -Data and other attributes inherited from build:
    -
    boolean_options = ['debug', 'force']
    - -
    description = 'build everything needed to install'
    - -
    help_options = [('help-compiler', None, 'list available compilers', <function show_compilers>)]
    - -
    user_options = [('build-base=', 'b', 'base directory for build library'), ('build-purelib=', None, 'build directory for platform-neutral distributions'), ('build-platlib=', None, 'build directory for platform-specific distributions'), ('build-lib=', None, 'build directory for all distribution (defaults to either build-purelib or build-platlib'), ('build-scripts=', None, 'build directory for scripts'), ('build-temp=', 't', 'temporary build directory'), ('compiler=', 'c', 'specify the compiler type'), ('debug', 'g', 'compile extensions and libraries with debugging information'), ('force', 'f', 'forcibly build everything (ignore file timestamps)'), ('executable=', 'e', 'specify final destination interpreter path (build.py)')]
    - -
    -Methods inherited from Command:
    -
    __getattr__(self, attr)
    - -
    __init__(self, dist)
    Create and initialize a new Command object.  Most importantly,
    -invokes the 'initialize_options()' method, which is the real
    -initializer and depends on the actual command being
    -instantiated.
    - -
    announce(self, msg, level=1)
    If the current verbosity level is of greater than or equal to
    -'level' print 'msg' to stdout.
    - -
    copy_file(self, infile, outfile, preserve_mode=1, preserve_times=1, link=None, level=1)
    Copy a file respecting verbose, dry-run and force flags.  (The
    -former two default to whatever is in the Distribution object, and
    -the latter defaults to false for commands that don't define it.)
    - -
    copy_tree(self, infile, outfile, preserve_mode=1, preserve_times=1, preserve_symlinks=0, level=1)
    Copy an entire directory tree respecting verbose, dry-run,
    -and force flags.
    - -
    debug_print(self, msg)
    Print 'msg' to stdout if the global DEBUG (taken from the
    -DISTUTILS_DEBUG environment variable) flag is true.
    - -
    dump_options(self, header=None, indent='')
    - -
    ensure_dirname(self, option)
    - -
    ensure_filename(self, option)
    Ensure that 'option' is the name of an existing file.
    - -
    ensure_finalized(self)
    - -
    ensure_string(self, option, default=None)
    Ensure that 'option' is a string; if not defined, set it to
    -'default'.
    - -
    ensure_string_list(self, option)
    Ensure that 'option' is a list of strings.  If 'option' is
    -currently a string, we split it either on /,\s*/ or /\s+/, so
    -"foo bar baz", "foo,bar,baz", and "foo,   bar baz" all become
    -["foo", "bar", "baz"].
    - -
    execute(self, func, args, msg=None, level=1)
    - -
    get_command_name(self)
    - -
    get_finalized_command(self, command, create=1)
    Wrapper around Distribution's 'get_command_obj()' method: find
    -(create if necessary and 'create' is true) the command object for
    -'command', call its 'ensure_finalized()' method, and return the
    -finalized command object.
    - -
    get_sub_commands(self)
    Determine the sub-commands that are relevant in the current
    -distribution (ie., that need to be run).  This is based on the
    -'sub_commands' class attribute: each tuple in that list may include
    -a method that we call to determine if the subcommand needs to be
    -run for the current distribution.  Return a list of command names.
    - -
    make_archive(self, base_name, format, root_dir=None, base_dir=None)
    - -
    make_file(self, infiles, outfile, func, args, exec_msg=None, skip_msg=None, level=1)
    Special case of 'execute()' for operations that process one or
    -more input files and generate one output file.  Works just like
    -'execute()', except the operation is skipped and a different
    -message printed if 'outfile' already exists and is newer than all
    -files listed in 'infiles'.  If the command defined 'self.force',
    -and it is true, then the command is unconditionally run -- does no
    -timestamp checks.
    - -
    mkpath(self, name, mode=511)
    - -
    move_file(self, src, dst, level=1)
    Move a file respectin dry-run flag.
    - -
    reinitialize_command(self, command, reinit_subcommands=0)
    # XXX rename to 'get_reinitialized_command()'? (should do the
    -# same in dist.py, if so)
    - -
    run_command(self, command)
    Run some other command: uses the 'run_command()' method of
    -Distribution, which creates and finalizes the command object if
    -necessary and then invokes its 'run()' method.
    - -
    set_undefined_options(self, src_cmd, *option_pairs)
    Set the values of any "undefined" options from corresponding
    -option values in some other command object.  "Undefined" here means
    -"is None", which is the convention used to indicate that an option
    -has not been changed between 'initialize_options()' and
    -'finalize_options()'.  Usually called from 'finalize_options()' for
    -options that depend on some other command rather than another
    -option of the same command.  'src_cmd' is the other command from
    -which option values will be taken (a command object will be created
    -for it if necessary); the remaining arguments are
    -'(src_option,dst_option)' tuples which mean "take the value of
    -'src_option' in the 'src_cmd' command object, and copy it to
    -'dst_option' in the current command object".
    - -
    spawn(self, cmd, search_path=1, level=1)
    Spawn an external command respecting dry-run flag.
    - -
    warn(self, msg)
    - -

    - - - - - - - -
     
    -class mx_build_clib(build_clib)
       build_clib command which builds the libs using
    -separate temp dirs
     
     
    Method resolution order:
    -
    mx_build_clib
    -
    build_clib
    -
    Command
    -
    -
    -Methods defined here:
    -
    build_libraries(self, libraries)
    - -
    build_library(self, lib_name, build_info)
    - -
    -Methods inherited from build_clib:
    -
    check_library_list(self, libraries)
    Ensure that the list of libraries (presumably provided as a
    -command option 'libraries') is valid, i.e. it is a list of
    -2-tuples, where the tuples are (library_name, build_info_dict).
    -Raise DistutilsSetupError if the structure is invalid anywhere;
    -just returns otherwise.
    - -
    finalize_options(self)
    - -
    get_library_names(self)
    - -
    get_source_files(self)
    - -
    initialize_options(self)
    - -
    run(self)
    - -
    -Data and other attributes inherited from build_clib:
    -
    boolean_options = ['debug', 'force']
    - -
    description = 'build C/C++ libraries used by Python extensions'
    - -
    help_options = [('help-compiler', None, 'list available compilers', <function show_compilers>)]
    - -
    user_options = [('build-clib', 'b', 'directory to build C/C++ libraries to'), ('build-temp', 't', 'directory to put temporary build by-products'), ('debug', 'g', 'compile with debugging information'), ('force', 'f', 'forcibly build everything (ignore file timestamps)'), ('compiler=', 'c', 'specify the compiler type')]
    - -
    -Methods inherited from Command:
    -
    __getattr__(self, attr)
    - -
    __init__(self, dist)
    Create and initialize a new Command object.  Most importantly,
    -invokes the 'initialize_options()' method, which is the real
    -initializer and depends on the actual command being
    -instantiated.
    - -
    announce(self, msg, level=1)
    If the current verbosity level is of greater than or equal to
    -'level' print 'msg' to stdout.
    - -
    copy_file(self, infile, outfile, preserve_mode=1, preserve_times=1, link=None, level=1)
    Copy a file respecting verbose, dry-run and force flags.  (The
    -former two default to whatever is in the Distribution object, and
    -the latter defaults to false for commands that don't define it.)
    - -
    copy_tree(self, infile, outfile, preserve_mode=1, preserve_times=1, preserve_symlinks=0, level=1)
    Copy an entire directory tree respecting verbose, dry-run,
    -and force flags.
    - -
    debug_print(self, msg)
    Print 'msg' to stdout if the global DEBUG (taken from the
    -DISTUTILS_DEBUG environment variable) flag is true.
    - -
    dump_options(self, header=None, indent='')
    - -
    ensure_dirname(self, option)
    - -
    ensure_filename(self, option)
    Ensure that 'option' is the name of an existing file.
    - -
    ensure_finalized(self)
    - -
    ensure_string(self, option, default=None)
    Ensure that 'option' is a string; if not defined, set it to
    -'default'.
    - -
    ensure_string_list(self, option)
    Ensure that 'option' is a list of strings.  If 'option' is
    -currently a string, we split it either on /,\s*/ or /\s+/, so
    -"foo bar baz", "foo,bar,baz", and "foo,   bar baz" all become
    -["foo", "bar", "baz"].
    - -
    execute(self, func, args, msg=None, level=1)
    - -
    get_command_name(self)
    - -
    get_finalized_command(self, command, create=1)
    Wrapper around Distribution's 'get_command_obj()' method: find
    -(create if necessary and 'create' is true) the command object for
    -'command', call its 'ensure_finalized()' method, and return the
    -finalized command object.
    - -
    get_sub_commands(self)
    Determine the sub-commands that are relevant in the current
    -distribution (ie., that need to be run).  This is based on the
    -'sub_commands' class attribute: each tuple in that list may include
    -a method that we call to determine if the subcommand needs to be
    -run for the current distribution.  Return a list of command names.
    - -
    make_archive(self, base_name, format, root_dir=None, base_dir=None)
    - -
    make_file(self, infiles, outfile, func, args, exec_msg=None, skip_msg=None, level=1)
    Special case of 'execute()' for operations that process one or
    -more input files and generate one output file.  Works just like
    -'execute()', except the operation is skipped and a different
    -message printed if 'outfile' already exists and is newer than all
    -files listed in 'infiles'.  If the command defined 'self.force',
    -and it is true, then the command is unconditionally run -- does no
    -timestamp checks.
    - -
    mkpath(self, name, mode=511)
    - -
    move_file(self, src, dst, level=1)
    Move a file respectin dry-run flag.
    - -
    reinitialize_command(self, command, reinit_subcommands=0)
    # XXX rename to 'get_reinitialized_command()'? (should do the
    -# same in dist.py, if so)
    - -
    run_command(self, command)
    Run some other command: uses the 'run_command()' method of
    -Distribution, which creates and finalizes the command object if
    -necessary and then invokes its 'run()' method.
    - -
    set_undefined_options(self, src_cmd, *option_pairs)
    Set the values of any "undefined" options from corresponding
    -option values in some other command object.  "Undefined" here means
    -"is None", which is the convention used to indicate that an option
    -has not been changed between 'initialize_options()' and
    -'finalize_options()'.  Usually called from 'finalize_options()' for
    -options that depend on some other command rather than another
    -option of the same command.  'src_cmd' is the other command from
    -which option values will be taken (a command object will be created
    -for it if necessary); the remaining arguments are
    -'(src_option,dst_option)' tuples which mean "take the value of
    -'src_option' in the 'src_cmd' command object, and copy it to
    -'dst_option' in the current command object".
    - -
    spawn(self, cmd, search_path=1, level=1)
    Spawn an external command respecting dry-run flag.
    - -
    warn(self, msg)
    - -
    -Data and other attributes inherited from Command:
    -
    sub_commands = []
    - -

    - - - - - - - -
     
    -class mx_build_ext(CompilerSupportMixin, build_ext)
       build_ext command which runs mx_autoconf command before
    -trying to build anything.
     
     
    Method resolution order:
    -
    mx_build_ext
    -
    CompilerSupportMixin
    -
    build_ext
    -
    Command
    -
    -
    -Methods defined here:
    -
    build_extension(self, ext)
    - -
    build_extensions(self)
    - -
    finalize_options(self)
    - -
    run(self)
    - -
    -Data and other attributes defined here:
    -
    autoconf = None
    - -
    disable_build = None
    - -
    enable_build = None
    - -
    user_options = [('build-lib=', 'b', 'directory for compiled extension modules'), ('build-temp=', 't', 'directory for temporary files (build by-products)'), ('inplace', 'i', 'ignore build-lib and put compiled extensions int...urce directory alongside your pure Python modules'), ('include-dirs=', 'I', "list of directories to search for header files (separated by ':')"), ('define=', 'D', 'C preprocessor macros to define'), ('undef=', 'U', 'C preprocessor macros to undefine'), ('libraries=', 'l', 'external C libraries to link with'), ('library-dirs=', 'L', "directories to search for external C libraries (separated by ':')"), ('rpath=', 'R', 'directories to search for shared C libraries at runtime'), ('link-objects=', 'O', 'extra explicit link objects to include in the link'), ('debug', 'g', 'compile/link with debugging information'), ('force', 'f', 'forcibly build everything (ignore file timestamps)'), ('compiler=', 'c', 'specify the compiler type'), ('swig-cpp', None, 'make SWIG create C++ files (default is C)'), ('swig-opts=', None, 'list of SWIG command line options'), ('swig=', None, 'path to the SWIG executable'), ('disable-build=', None, 'disable building an optional extensions (comma s...ed package names); default is to try building all'), ('enable-build=', None, 'if given, only these optional extensions are built (comma separated list of dotted package names)')]
    - -
    -Methods inherited from CompilerSupportMixin:
    -
    prepare_compiler(self)
    - -
    -Data and other attributes inherited from CompilerSupportMixin:
    -
    prepared_compiler = 0
    - -
    -Methods inherited from build_ext:
    -
    check_extensions_list(self, extensions)
    Ensure that the list of extensions (presumably provided as a
    -command option 'extensions') is valid, i.e. it is a list of
    -Extension objects.  We also support the old-style list of 2-tuples,
    -where the tuples are (ext_name, build_info), which are converted to
    -Extension instances here.

    -Raise DistutilsSetupError if the structure is invalid anywhere;
    -just returns otherwise.
    - -
    find_swig(self)
    Return the name of the SWIG executable.  On Unix, this is
    -just "swig" -- it should be in the PATH.  Tries a bit harder on
    -Windows.
    - -
    get_export_symbols(self, ext)
    Return the list of symbols that a shared extension has to
    -export.  This either uses 'ext.export_symbols' or, if it's not
    -provided, "init" + module_name.  Only relevant on Windows, where
    -the .pyd file (DLL) must export the module "init" function.
    - -
    get_ext_filename(self, ext_name)
    Convert the name of an extension (eg. "foo.bar") into the name
    -of the file from which it will be loaded (eg. "foo/bar.so", or
    -"foo\bar.pyd").
    - -
    get_ext_fullname(self, ext_name)
    - -
    get_libraries(self, ext)
    Return the list of libraries to link against when building a
    -shared extension.  On most platforms, this is just 'ext.libraries';
    -on Windows and OS/2, we add the Python library (eg. python20.dll).
    - -
    get_outputs(self)
    - -
    get_source_files(self)
    - -
    initialize_options(self)
    - -
    swig_sources(self, sources, extension)
    Walk the list of source files in 'sources', looking for SWIG
    -interface (.i) files.  Run SWIG on all that are found, and
    -return a modified 'sources' list with SWIG source files replaced
    -by the generated C (or C++) files.
    - -
    -Data and other attributes inherited from build_ext:
    -
    boolean_options = ['inplace', 'debug', 'force', 'swig-cpp']
    - -
    description = 'build C/C++ extensions (compile/link to build directory)'
    - -
    help_options = [('help-compiler', None, 'list available compilers', <function show_compilers>)]
    - -
    sep_by = " (separated by ':')"
    - -
    -Methods inherited from Command:
    -
    __getattr__(self, attr)
    - -
    __init__(self, dist)
    Create and initialize a new Command object.  Most importantly,
    -invokes the 'initialize_options()' method, which is the real
    -initializer and depends on the actual command being
    -instantiated.
    - -
    announce(self, msg, level=1)
    If the current verbosity level is of greater than or equal to
    -'level' print 'msg' to stdout.
    - -
    copy_file(self, infile, outfile, preserve_mode=1, preserve_times=1, link=None, level=1)
    Copy a file respecting verbose, dry-run and force flags.  (The
    -former two default to whatever is in the Distribution object, and
    -the latter defaults to false for commands that don't define it.)
    - -
    copy_tree(self, infile, outfile, preserve_mode=1, preserve_times=1, preserve_symlinks=0, level=1)
    Copy an entire directory tree respecting verbose, dry-run,
    -and force flags.
    - -
    debug_print(self, msg)
    Print 'msg' to stdout if the global DEBUG (taken from the
    -DISTUTILS_DEBUG environment variable) flag is true.
    - -
    dump_options(self, header=None, indent='')
    - -
    ensure_dirname(self, option)
    - -
    ensure_filename(self, option)
    Ensure that 'option' is the name of an existing file.
    - -
    ensure_finalized(self)
    - -
    ensure_string(self, option, default=None)
    Ensure that 'option' is a string; if not defined, set it to
    -'default'.
    - -
    ensure_string_list(self, option)
    Ensure that 'option' is a list of strings.  If 'option' is
    -currently a string, we split it either on /,\s*/ or /\s+/, so
    -"foo bar baz", "foo,bar,baz", and "foo,   bar baz" all become
    -["foo", "bar", "baz"].
    - -
    execute(self, func, args, msg=None, level=1)
    - -
    get_command_name(self)
    - -
    get_finalized_command(self, command, create=1)
    Wrapper around Distribution's 'get_command_obj()' method: find
    -(create if necessary and 'create' is true) the command object for
    -'command', call its 'ensure_finalized()' method, and return the
    -finalized command object.
    - -
    get_sub_commands(self)
    Determine the sub-commands that are relevant in the current
    -distribution (ie., that need to be run).  This is based on the
    -'sub_commands' class attribute: each tuple in that list may include
    -a method that we call to determine if the subcommand needs to be
    -run for the current distribution.  Return a list of command names.
    - -
    make_archive(self, base_name, format, root_dir=None, base_dir=None)
    - -
    make_file(self, infiles, outfile, func, args, exec_msg=None, skip_msg=None, level=1)
    Special case of 'execute()' for operations that process one or
    -more input files and generate one output file.  Works just like
    -'execute()', except the operation is skipped and a different
    -message printed if 'outfile' already exists and is newer than all
    -files listed in 'infiles'.  If the command defined 'self.force',
    -and it is true, then the command is unconditionally run -- does no
    -timestamp checks.
    - -
    mkpath(self, name, mode=511)
    - -
    move_file(self, src, dst, level=1)
    Move a file respectin dry-run flag.
    - -
    reinitialize_command(self, command, reinit_subcommands=0)
    # XXX rename to 'get_reinitialized_command()'? (should do the
    -# same in dist.py, if so)
    - -
    run_command(self, command)
    Run some other command: uses the 'run_command()' method of
    -Distribution, which creates and finalizes the command object if
    -necessary and then invokes its 'run()' method.
    - -
    set_undefined_options(self, src_cmd, *option_pairs)
    Set the values of any "undefined" options from corresponding
    -option values in some other command object.  "Undefined" here means
    -"is None", which is the convention used to indicate that an option
    -has not been changed between 'initialize_options()' and
    -'finalize_options()'.  Usually called from 'finalize_options()' for
    -options that depend on some other command rather than another
    -option of the same command.  'src_cmd' is the other command from
    -which option values will be taken (a command object will be created
    -for it if necessary); the remaining arguments are
    -'(src_option,dst_option)' tuples which mean "take the value of
    -'src_option' in the 'src_cmd' command object, and copy it to
    -'dst_option' in the current command object".
    - -
    spawn(self, cmd, search_path=1, level=1)
    Spawn an external command respecting dry-run flag.
    - -
    warn(self, msg)
    - -
    -Data and other attributes inherited from Command:
    -
    sub_commands = []
    - -

    - - - - - - - -
     
    -class mx_build_py(build_py)
       build_py command which also allows removing Python source code
    -after the byte-code compile process.
     
     
    Method resolution order:
    -
    mx_build_py
    -
    build_py
    -
    Command
    -
    -
    -Methods defined here:
    -
    get_outputs(self, include_bytecode=1)
    - -
    run(self)
    - -
    -Data and other attributes defined here:
    -
    boolean_options = ['compile', 'force', 'without-source']
    - -
    user_options = [('build-lib=', 'd', 'directory to "build" (copy) to'), ('compile', 'c', 'compile .py to .pyc'), ('no-compile', None, "don't compile .py files [default]"), ('optimize=', 'O', 'also compile with optimization: -O1 for "python ...r "python -OO", and -O0 to disable [default: -O0]'), ('force', 'f', 'forcibly build everything (ignore file timestamps)'), ('without-source', None, 'only include Python byte-code')]
    - -
    without_source = 0
    - -
    -Methods inherited from build_py:
    -
    build_module(self, module, module_file, package)
    - -
    build_modules(self)
    - -
    build_package_data(self)
    Copy data files into build directory
    - -
    build_packages(self)
    - -
    byte_compile(self, files)
    - -
    check_module(self, module, module_file)
    - -
    check_package(self, package, package_dir)
    - -
    finalize_options(self)
    - -
    find_all_modules(self)
    Compute the list of all modules that will be built, whether
    -they are specified one-module-at-a-time ('self.py_modules') or
    -by whole packages ('self.packages').  Return a list of tuples
    -(package, module, module_file), just like 'find_modules()' and
    -'find_package_modules()' do.
    - -
    find_data_files(self, package, src_dir)
    Return filenames for package's data files in 'src_dir'
    - -
    find_modules(self)
    Finds individually-specified Python modules, ie. those listed by
    -module name in 'self.py_modules'.  Returns a list of tuples (package,
    -module_base, filename): 'package' is a tuple of the path through
    -package-space to the module; 'module_base' is the bare (no
    -packages, no dots) module name, and 'filename' is the path to the
    -".py" file (relative to the distribution root) that implements the
    -module.
    - -
    find_package_modules(self, package, package_dir)
    - -
    get_data_files(self)
    Generate list of '(package,src_dir,build_dir,filenames)' tuples
    - -
    get_module_outfile(self, build_dir, package, module)
    - -
    get_package_dir(self, package)
    Return the directory, relative to the top of the source
    -distribution, where package 'package' should be found
    -(at least according to the 'package_dir' option, if any).
    - -
    get_source_files(self)
    - -
    initialize_options(self)
    - -
    -Data and other attributes inherited from build_py:
    -
    description = '"build" pure Python modules (copy to build directory)'
    - -
    negative_opt = {'no-compile': 'compile'}
    - -
    -Methods inherited from Command:
    -
    __getattr__(self, attr)
    - -
    __init__(self, dist)
    Create and initialize a new Command object.  Most importantly,
    -invokes the 'initialize_options()' method, which is the real
    -initializer and depends on the actual command being
    -instantiated.
    - -
    announce(self, msg, level=1)
    If the current verbosity level is of greater than or equal to
    -'level' print 'msg' to stdout.
    - -
    copy_file(self, infile, outfile, preserve_mode=1, preserve_times=1, link=None, level=1)
    Copy a file respecting verbose, dry-run and force flags.  (The
    -former two default to whatever is in the Distribution object, and
    -the latter defaults to false for commands that don't define it.)
    - -
    copy_tree(self, infile, outfile, preserve_mode=1, preserve_times=1, preserve_symlinks=0, level=1)
    Copy an entire directory tree respecting verbose, dry-run,
    -and force flags.
    - -
    debug_print(self, msg)
    Print 'msg' to stdout if the global DEBUG (taken from the
    -DISTUTILS_DEBUG environment variable) flag is true.
    - -
    dump_options(self, header=None, indent='')
    - -
    ensure_dirname(self, option)
    - -
    ensure_filename(self, option)
    Ensure that 'option' is the name of an existing file.
    - -
    ensure_finalized(self)
    - -
    ensure_string(self, option, default=None)
    Ensure that 'option' is a string; if not defined, set it to
    -'default'.
    - -
    ensure_string_list(self, option)
    Ensure that 'option' is a list of strings.  If 'option' is
    -currently a string, we split it either on /,\s*/ or /\s+/, so
    -"foo bar baz", "foo,bar,baz", and "foo,   bar baz" all become
    -["foo", "bar", "baz"].
    - -
    execute(self, func, args, msg=None, level=1)
    - -
    get_command_name(self)
    - -
    get_finalized_command(self, command, create=1)
    Wrapper around Distribution's 'get_command_obj()' method: find
    -(create if necessary and 'create' is true) the command object for
    -'command', call its 'ensure_finalized()' method, and return the
    -finalized command object.
    - -
    get_sub_commands(self)
    Determine the sub-commands that are relevant in the current
    -distribution (ie., that need to be run).  This is based on the
    -'sub_commands' class attribute: each tuple in that list may include
    -a method that we call to determine if the subcommand needs to be
    -run for the current distribution.  Return a list of command names.
    - -
    make_archive(self, base_name, format, root_dir=None, base_dir=None)
    - -
    make_file(self, infiles, outfile, func, args, exec_msg=None, skip_msg=None, level=1)
    Special case of 'execute()' for operations that process one or
    -more input files and generate one output file.  Works just like
    -'execute()', except the operation is skipped and a different
    -message printed if 'outfile' already exists and is newer than all
    -files listed in 'infiles'.  If the command defined 'self.force',
    -and it is true, then the command is unconditionally run -- does no
    -timestamp checks.
    - -
    mkpath(self, name, mode=511)
    - -
    move_file(self, src, dst, level=1)
    Move a file respectin dry-run flag.
    - -
    reinitialize_command(self, command, reinit_subcommands=0)
    # XXX rename to 'get_reinitialized_command()'? (should do the
    -# same in dist.py, if so)
    - -
    run_command(self, command)
    Run some other command: uses the 'run_command()' method of
    -Distribution, which creates and finalizes the command object if
    -necessary and then invokes its 'run()' method.
    - -
    set_undefined_options(self, src_cmd, *option_pairs)
    Set the values of any "undefined" options from corresponding
    -option values in some other command object.  "Undefined" here means
    -"is None", which is the convention used to indicate that an option
    -has not been changed between 'initialize_options()' and
    -'finalize_options()'.  Usually called from 'finalize_options()' for
    -options that depend on some other command rather than another
    -option of the same command.  'src_cmd' is the other command from
    -which option values will be taken (a command object will be created
    -for it if necessary); the remaining arguments are
    -'(src_option,dst_option)' tuples which mean "take the value of
    -'src_option' in the 'src_cmd' command object, and copy it to
    -'dst_option' in the current command object".
    - -
    spawn(self, cmd, search_path=1, level=1)
    Spawn an external command respecting dry-run flag.
    - -
    warn(self, msg)
    - -
    -Data and other attributes inherited from Command:
    -
    sub_commands = []
    - -

    - - - - - - - -
     
    -class mx_build_unixlib(Command)
       This command compiles external libs using the standard Unix
    -procedure for this:

    -./configure
    -make
     
     Methods defined here:
    -
    build_unixlib(self, unixlib)
    - -
    build_unixlibs(self, unixlibs)
    - -
    finalize_options(self)
    - -
    get_unixlib_lib_options(self)
    - -
    initialize_options(self)
    - -
    run(self)
    - -
    run_configure(self, options=[], dir=None, configure='configure')
    Run the configure script using options is given.

    -Options must be a list of tuples (optionname,
    -optionvalue).  If an option should not have a value,
    -passing None as optionvalue will have the effect of using
    -the option without value.

    -dir can be given to have the configure script execute in
    -that directory instead of the current one.
    - -
    run_make(self, targets=[], dir=None, make='make', options=[])
    Run the make command for the given targets.

    -Targets must be a list of valid Makefile targets.

    -dir can be given to have the make program execute in that
    -directory instead of the current one.
    - -
    run_script(self, script, options=[])
    - -
    -Data and other attributes defined here:
    -
    boolean_options = ['force']
    - -
    description = 'build Unix libraries used by Python extensions'
    - -
    make = None
    - -
    user_options = [('build-lib=', 'b', 'directory to store built Unix libraries in'), ('build-temp=', 't', 'directory to build Unix libraries to'), ('make=', None, 'make program to use'), ('makefile=', None, 'makefile to use'), ('force', 'f', 'forcibly reconfigure')]
    - -
    -Methods inherited from Command:
    -
    __getattr__(self, attr)
    - -
    __init__(self, dist)
    Create and initialize a new Command object.  Most importantly,
    -invokes the 'initialize_options()' method, which is the real
    -initializer and depends on the actual command being
    -instantiated.
    - -
    announce(self, msg, level=1)
    If the current verbosity level is of greater than or equal to
    -'level' print 'msg' to stdout.
    - -
    copy_file(self, infile, outfile, preserve_mode=1, preserve_times=1, link=None, level=1)
    Copy a file respecting verbose, dry-run and force flags.  (The
    -former two default to whatever is in the Distribution object, and
    -the latter defaults to false for commands that don't define it.)
    - -
    copy_tree(self, infile, outfile, preserve_mode=1, preserve_times=1, preserve_symlinks=0, level=1)
    Copy an entire directory tree respecting verbose, dry-run,
    -and force flags.
    - -
    debug_print(self, msg)
    Print 'msg' to stdout if the global DEBUG (taken from the
    -DISTUTILS_DEBUG environment variable) flag is true.
    - -
    dump_options(self, header=None, indent='')
    - -
    ensure_dirname(self, option)
    - -
    ensure_filename(self, option)
    Ensure that 'option' is the name of an existing file.
    - -
    ensure_finalized(self)
    - -
    ensure_string(self, option, default=None)
    Ensure that 'option' is a string; if not defined, set it to
    -'default'.
    - -
    ensure_string_list(self, option)
    Ensure that 'option' is a list of strings.  If 'option' is
    -currently a string, we split it either on /,\s*/ or /\s+/, so
    -"foo bar baz", "foo,bar,baz", and "foo,   bar baz" all become
    -["foo", "bar", "baz"].
    - -
    execute(self, func, args, msg=None, level=1)
    - -
    get_command_name(self)
    - -
    get_finalized_command(self, command, create=1)
    Wrapper around Distribution's 'get_command_obj()' method: find
    -(create if necessary and 'create' is true) the command object for
    -'command', call its 'ensure_finalized()' method, and return the
    -finalized command object.
    - -
    get_sub_commands(self)
    Determine the sub-commands that are relevant in the current
    -distribution (ie., that need to be run).  This is based on the
    -'sub_commands' class attribute: each tuple in that list may include
    -a method that we call to determine if the subcommand needs to be
    -run for the current distribution.  Return a list of command names.
    - -
    make_archive(self, base_name, format, root_dir=None, base_dir=None)
    - -
    make_file(self, infiles, outfile, func, args, exec_msg=None, skip_msg=None, level=1)
    Special case of 'execute()' for operations that process one or
    -more input files and generate one output file.  Works just like
    -'execute()', except the operation is skipped and a different
    -message printed if 'outfile' already exists and is newer than all
    -files listed in 'infiles'.  If the command defined 'self.force',
    -and it is true, then the command is unconditionally run -- does no
    -timestamp checks.
    - -
    mkpath(self, name, mode=511)
    - -
    move_file(self, src, dst, level=1)
    Move a file respectin dry-run flag.
    - -
    reinitialize_command(self, command, reinit_subcommands=0)
    # XXX rename to 'get_reinitialized_command()'? (should do the
    -# same in dist.py, if so)
    - -
    run_command(self, command)
    Run some other command: uses the 'run_command()' method of
    -Distribution, which creates and finalizes the command object if
    -necessary and then invokes its 'run()' method.
    - -
    set_undefined_options(self, src_cmd, *option_pairs)
    Set the values of any "undefined" options from corresponding
    -option values in some other command object.  "Undefined" here means
    -"is None", which is the convention used to indicate that an option
    -has not been changed between 'initialize_options()' and
    -'finalize_options()'.  Usually called from 'finalize_options()' for
    -options that depend on some other command rather than another
    -option of the same command.  'src_cmd' is the other command from
    -which option values will be taken (a command object will be created
    -for it if necessary); the remaining arguments are
    -'(src_option,dst_option)' tuples which mean "take the value of
    -'src_option' in the 'src_cmd' command object, and copy it to
    -'dst_option' in the current command object".
    - -
    spawn(self, cmd, search_path=1, level=1)
    Spawn an external command respecting dry-run flag.
    - -
    warn(self, msg)
    - -
    -Data and other attributes inherited from Command:
    -
    sub_commands = []
    - -

    - - - - - - - -
     
    -class mx_install(install)
       We want install_data to default to install_purelib
    -if it is not given.
     
     
    Method resolution order:
    -
    mx_install
    -
    install
    -
    Command
    -
    -
    -Methods defined here:
    -
    ensure_finalized(self)
    - -
    finalize_options(self)
    - -
    -Methods inherited from install:
    -
    change_roots(self, *names)
    - -
    convert_paths(self, *names)
    - -
    create_path_file(self)
    - -
    dump_dirs(self, msg)
    - -
    expand_basedirs(self)
    - -
    expand_dirs(self)
    - -
    finalize_other(self)
    - -
    finalize_unix(self)
    - -
    get_inputs(self)
    - -
    get_outputs(self)
    - -
    handle_extra_path(self)
    - -
    has_data(self)
    - -
    has_headers(self)
    - -
    has_lib(self)
    Return true if the current distribution has any Python
    -modules to install.
    - -
    has_scripts(self)
    - -
    initialize_options(self)
    - -
    run(self)
    - -
    select_scheme(self, name)
    - -
    -Data and other attributes inherited from install:
    -
    boolean_options = ['compile', 'force', 'skip-build']
    - -
    description = 'install everything from build directory'
    - -
    negative_opt = {'no-compile': 'compile'}
    - -
    sub_commands = [('install_lib', <function has_lib>), ('install_headers', <function has_headers>), ('install_scripts', <function has_scripts>), ('install_data', <function has_data>)]
    - -
    user_options = [('prefix=', None, 'installation prefix'), ('exec-prefix=', None, '(Unix only) prefix for platform-specific files'), ('home=', None, '(Unix only) home directory to install under'), ('install-base=', None, 'base installation directory (instead of --prefix or --home)'), ('install-platbase=', None, 'base installation directory for platform-specific files (instead of --exec-prefix or --home)'), ('root=', None, 'install everything relative to this alternate root directory'), ('install-purelib=', None, 'installation directory for pure Python module distributions'), ('install-platlib=', None, 'installation directory for non-pure module distributions'), ('install-lib=', None, 'installation directory for all module distributi...verrides --install-purelib and --install-platlib)'), ('install-headers=', None, 'installation directory for C/C++ headers'), ('install-scripts=', None, 'installation directory for Python scripts'), ('install-data=', None, 'installation directory for data files'), ('compile', 'c', 'compile .py to .pyc [default]'), ('no-compile', None, "don't compile .py files"), ('optimize=', 'O', 'also compile with optimization: -O1 for "python ...r "python -OO", and -O0 to disable [default: -O0]'), ('force', 'f', 'force installation (overwrite any existing files)'), ('skip-build', None, 'skip rebuilding everything (for testing/debugging)'), ('record=', None, 'filename in which to record list of installed files')]
    - -
    -Methods inherited from Command:
    -
    __getattr__(self, attr)
    - -
    __init__(self, dist)
    Create and initialize a new Command object.  Most importantly,
    -invokes the 'initialize_options()' method, which is the real
    -initializer and depends on the actual command being
    -instantiated.
    - -
    announce(self, msg, level=1)
    If the current verbosity level is of greater than or equal to
    -'level' print 'msg' to stdout.
    - -
    copy_file(self, infile, outfile, preserve_mode=1, preserve_times=1, link=None, level=1)
    Copy a file respecting verbose, dry-run and force flags.  (The
    -former two default to whatever is in the Distribution object, and
    -the latter defaults to false for commands that don't define it.)
    - -
    copy_tree(self, infile, outfile, preserve_mode=1, preserve_times=1, preserve_symlinks=0, level=1)
    Copy an entire directory tree respecting verbose, dry-run,
    -and force flags.
    - -
    debug_print(self, msg)
    Print 'msg' to stdout if the global DEBUG (taken from the
    -DISTUTILS_DEBUG environment variable) flag is true.
    - -
    dump_options(self, header=None, indent='')
    - -
    ensure_dirname(self, option)
    - -
    ensure_filename(self, option)
    Ensure that 'option' is the name of an existing file.
    - -
    ensure_string(self, option, default=None)
    Ensure that 'option' is a string; if not defined, set it to
    -'default'.
    - -
    ensure_string_list(self, option)
    Ensure that 'option' is a list of strings.  If 'option' is
    -currently a string, we split it either on /,\s*/ or /\s+/, so
    -"foo bar baz", "foo,bar,baz", and "foo,   bar baz" all become
    -["foo", "bar", "baz"].
    - -
    execute(self, func, args, msg=None, level=1)
    - -
    get_command_name(self)
    - -
    get_finalized_command(self, command, create=1)
    Wrapper around Distribution's 'get_command_obj()' method: find
    -(create if necessary and 'create' is true) the command object for
    -'command', call its 'ensure_finalized()' method, and return the
    -finalized command object.
    - -
    get_sub_commands(self)
    Determine the sub-commands that are relevant in the current
    -distribution (ie., that need to be run).  This is based on the
    -'sub_commands' class attribute: each tuple in that list may include
    -a method that we call to determine if the subcommand needs to be
    -run for the current distribution.  Return a list of command names.
    - -
    make_archive(self, base_name, format, root_dir=None, base_dir=None)
    - -
    make_file(self, infiles, outfile, func, args, exec_msg=None, skip_msg=None, level=1)
    Special case of 'execute()' for operations that process one or
    -more input files and generate one output file.  Works just like
    -'execute()', except the operation is skipped and a different
    -message printed if 'outfile' already exists and is newer than all
    -files listed in 'infiles'.  If the command defined 'self.force',
    -and it is true, then the command is unconditionally run -- does no
    -timestamp checks.
    - -
    mkpath(self, name, mode=511)
    - -
    move_file(self, src, dst, level=1)
    Move a file respectin dry-run flag.
    - -
    reinitialize_command(self, command, reinit_subcommands=0)
    # XXX rename to 'get_reinitialized_command()'? (should do the
    -# same in dist.py, if so)
    - -
    run_command(self, command)
    Run some other command: uses the 'run_command()' method of
    -Distribution, which creates and finalizes the command object if
    -necessary and then invokes its 'run()' method.
    - -
    set_undefined_options(self, src_cmd, *option_pairs)
    Set the values of any "undefined" options from corresponding
    -option values in some other command object.  "Undefined" here means
    -"is None", which is the convention used to indicate that an option
    -has not been changed between 'initialize_options()' and
    -'finalize_options()'.  Usually called from 'finalize_options()' for
    -options that depend on some other command rather than another
    -option of the same command.  'src_cmd' is the other command from
    -which option values will be taken (a command object will be created
    -for it if necessary); the remaining arguments are
    -'(src_option,dst_option)' tuples which mean "take the value of
    -'src_option' in the 'src_cmd' command object, and copy it to
    -'dst_option' in the current command object".
    - -
    spawn(self, cmd, search_path=1, level=1)
    Spawn an external command respecting dry-run flag.
    - -
    warn(self, msg)
    - -

    - - - - - - - -
     
    -class mx_install_data(install_data)
       Rework the install_data command to something more useful.
     
     
    Method resolution order:
    -
    mx_install_data
    -
    install_data
    -
    Command
    -
    -
    -Methods defined here:
    -
    finalize_options(self)
    - -
    run(self)
    - -
    -Methods inherited from install_data:
    -
    get_inputs(self)
    - -
    get_outputs(self)
    - -
    initialize_options(self)
    - -
    -Data and other attributes inherited from install_data:
    -
    boolean_options = ['force']
    - -
    description = 'install data files'
    - -
    user_options = [('install-dir=', 'd', 'base directory for installing data files (default: installation base dir)'), ('root=', None, 'install everything relative to this alternate root directory'), ('force', 'f', 'force installation (overwrite existing files)')]
    - -
    -Methods inherited from Command:
    -
    __getattr__(self, attr)
    - -
    __init__(self, dist)
    Create and initialize a new Command object.  Most importantly,
    -invokes the 'initialize_options()' method, which is the real
    -initializer and depends on the actual command being
    -instantiated.
    - -
    announce(self, msg, level=1)
    If the current verbosity level is of greater than or equal to
    -'level' print 'msg' to stdout.
    - -
    copy_file(self, infile, outfile, preserve_mode=1, preserve_times=1, link=None, level=1)
    Copy a file respecting verbose, dry-run and force flags.  (The
    -former two default to whatever is in the Distribution object, and
    -the latter defaults to false for commands that don't define it.)
    - -
    copy_tree(self, infile, outfile, preserve_mode=1, preserve_times=1, preserve_symlinks=0, level=1)
    Copy an entire directory tree respecting verbose, dry-run,
    -and force flags.
    - -
    debug_print(self, msg)
    Print 'msg' to stdout if the global DEBUG (taken from the
    -DISTUTILS_DEBUG environment variable) flag is true.
    - -
    dump_options(self, header=None, indent='')
    - -
    ensure_dirname(self, option)
    - -
    ensure_filename(self, option)
    Ensure that 'option' is the name of an existing file.
    - -
    ensure_finalized(self)
    - -
    ensure_string(self, option, default=None)
    Ensure that 'option' is a string; if not defined, set it to
    -'default'.
    - -
    ensure_string_list(self, option)
    Ensure that 'option' is a list of strings.  If 'option' is
    -currently a string, we split it either on /,\s*/ or /\s+/, so
    -"foo bar baz", "foo,bar,baz", and "foo,   bar baz" all become
    -["foo", "bar", "baz"].
    - -
    execute(self, func, args, msg=None, level=1)
    - -
    get_command_name(self)
    - -
    get_finalized_command(self, command, create=1)
    Wrapper around Distribution's 'get_command_obj()' method: find
    -(create if necessary and 'create' is true) the command object for
    -'command', call its 'ensure_finalized()' method, and return the
    -finalized command object.
    - -
    get_sub_commands(self)
    Determine the sub-commands that are relevant in the current
    -distribution (ie., that need to be run).  This is based on the
    -'sub_commands' class attribute: each tuple in that list may include
    -a method that we call to determine if the subcommand needs to be
    -run for the current distribution.  Return a list of command names.
    - -
    make_archive(self, base_name, format, root_dir=None, base_dir=None)
    - -
    make_file(self, infiles, outfile, func, args, exec_msg=None, skip_msg=None, level=1)
    Special case of 'execute()' for operations that process one or
    -more input files and generate one output file.  Works just like
    -'execute()', except the operation is skipped and a different
    -message printed if 'outfile' already exists and is newer than all
    -files listed in 'infiles'.  If the command defined 'self.force',
    -and it is true, then the command is unconditionally run -- does no
    -timestamp checks.
    - -
    mkpath(self, name, mode=511)
    - -
    move_file(self, src, dst, level=1)
    Move a file respectin dry-run flag.
    - -
    reinitialize_command(self, command, reinit_subcommands=0)
    # XXX rename to 'get_reinitialized_command()'? (should do the
    -# same in dist.py, if so)
    - -
    run_command(self, command)
    Run some other command: uses the 'run_command()' method of
    -Distribution, which creates and finalizes the command object if
    -necessary and then invokes its 'run()' method.
    - -
    set_undefined_options(self, src_cmd, *option_pairs)
    Set the values of any "undefined" options from corresponding
    -option values in some other command object.  "Undefined" here means
    -"is None", which is the convention used to indicate that an option
    -has not been changed between 'initialize_options()' and
    -'finalize_options()'.  Usually called from 'finalize_options()' for
    -options that depend on some other command rather than another
    -option of the same command.  'src_cmd' is the other command from
    -which option values will be taken (a command object will be created
    -for it if necessary); the remaining arguments are
    -'(src_option,dst_option)' tuples which mean "take the value of
    -'src_option' in the 'src_cmd' command object, and copy it to
    -'dst_option' in the current command object".
    - -
    spawn(self, cmd, search_path=1, level=1)
    Spawn an external command respecting dry-run flag.
    - -
    warn(self, msg)
    - -
    -Data and other attributes inherited from Command:
    -
    sub_commands = []
    - -

    - - - - - -
     
    -class mx_uninstall(Command)
        Methods defined here:
    -
    finalize_options(self)
    - -
    initialize_options(self)
    - -
    run(self)
    - -
    -Data and other attributes defined here:
    -
    description = 'uninstall the package files and directories'
    - -
    user_options = []
    - -
    -Methods inherited from Command:
    -
    __getattr__(self, attr)
    - -
    __init__(self, dist)
    Create and initialize a new Command object.  Most importantly,
    -invokes the 'initialize_options()' method, which is the real
    -initializer and depends on the actual command being
    -instantiated.
    - -
    announce(self, msg, level=1)
    If the current verbosity level is of greater than or equal to
    -'level' print 'msg' to stdout.
    - -
    copy_file(self, infile, outfile, preserve_mode=1, preserve_times=1, link=None, level=1)
    Copy a file respecting verbose, dry-run and force flags.  (The
    -former two default to whatever is in the Distribution object, and
    -the latter defaults to false for commands that don't define it.)
    - -
    copy_tree(self, infile, outfile, preserve_mode=1, preserve_times=1, preserve_symlinks=0, level=1)
    Copy an entire directory tree respecting verbose, dry-run,
    -and force flags.
    - -
    debug_print(self, msg)
    Print 'msg' to stdout if the global DEBUG (taken from the
    -DISTUTILS_DEBUG environment variable) flag is true.
    - -
    dump_options(self, header=None, indent='')
    - -
    ensure_dirname(self, option)
    - -
    ensure_filename(self, option)
    Ensure that 'option' is the name of an existing file.
    - -
    ensure_finalized(self)
    - -
    ensure_string(self, option, default=None)
    Ensure that 'option' is a string; if not defined, set it to
    -'default'.
    - -
    ensure_string_list(self, option)
    Ensure that 'option' is a list of strings.  If 'option' is
    -currently a string, we split it either on /,\s*/ or /\s+/, so
    -"foo bar baz", "foo,bar,baz", and "foo,   bar baz" all become
    -["foo", "bar", "baz"].
    - -
    execute(self, func, args, msg=None, level=1)
    - -
    get_command_name(self)
    - -
    get_finalized_command(self, command, create=1)
    Wrapper around Distribution's 'get_command_obj()' method: find
    -(create if necessary and 'create' is true) the command object for
    -'command', call its 'ensure_finalized()' method, and return the
    -finalized command object.
    - -
    get_sub_commands(self)
    Determine the sub-commands that are relevant in the current
    -distribution (ie., that need to be run).  This is based on the
    -'sub_commands' class attribute: each tuple in that list may include
    -a method that we call to determine if the subcommand needs to be
    -run for the current distribution.  Return a list of command names.
    - -
    make_archive(self, base_name, format, root_dir=None, base_dir=None)
    - -
    make_file(self, infiles, outfile, func, args, exec_msg=None, skip_msg=None, level=1)
    Special case of 'execute()' for operations that process one or
    -more input files and generate one output file.  Works just like
    -'execute()', except the operation is skipped and a different
    -message printed if 'outfile' already exists and is newer than all
    -files listed in 'infiles'.  If the command defined 'self.force',
    -and it is true, then the command is unconditionally run -- does no
    -timestamp checks.
    - -
    mkpath(self, name, mode=511)
    - -
    move_file(self, src, dst, level=1)
    Move a file respectin dry-run flag.
    - -
    reinitialize_command(self, command, reinit_subcommands=0)
    # XXX rename to 'get_reinitialized_command()'? (should do the
    -# same in dist.py, if so)
    - -
    run_command(self, command)
    Run some other command: uses the 'run_command()' method of
    -Distribution, which creates and finalizes the command object if
    -necessary and then invokes its 'run()' method.
    - -
    set_undefined_options(self, src_cmd, *option_pairs)
    Set the values of any "undefined" options from corresponding
    -option values in some other command object.  "Undefined" here means
    -"is None", which is the convention used to indicate that an option
    -has not been changed between 'initialize_options()' and
    -'finalize_options()'.  Usually called from 'finalize_options()' for
    -options that depend on some other command rather than another
    -option of the same command.  'src_cmd' is the other command from
    -which option values will be taken (a command object will be created
    -for it if necessary); the remaining arguments are
    -'(src_option,dst_option)' tuples which mean "take the value of
    -'src_option' in the 'src_cmd' command object, and copy it to
    -'dst_option' in the current command object".
    - -
    spawn(self, cmd, search_path=1, level=1)
    Spawn an external command respecting dry-run flag.
    - -
    warn(self, msg)
    - -
    -Data and other attributes inherited from Command:
    -
    sub_commands = []
    - -

    - - - - - -
     
    -Functions
           
    add_dir(dir, pathlist, index=-1)
    -
    build_path(dirs)
    Builds a path list from a list of directories/paths.

    -The dirs list may contain shell variable references and user
    -dir references. These will get expanded
    -automatically. Non-existing shell variables are replaced with
    -an empty string. Path entries will get expanded to single
    -directory entries.  Empty string entries are removed from the
    -list.
    -
    find_file(filename, paths, pattern=None)
    Look for a file in the directories defined in the list
    -paths.

    -If pattern is given, the found files are additionally checked
    -to include the given RE search pattern. Pattern matching is
    -done case-insensitive per default.

    -Returns the directory where the file can be found or None in
    -case it was not found.
    -
    get_msvc_paths()
    Return a tuple (libpath, inclpath) defining the search
    -paths for library files and include files that the MS VC++
    -compiler uses per default.

    -Both entries are lists of directories.

    -Only available on Windows platforms with installed compiler.
    -
    mx_customize_compiler(compiler)
    -
    mx_make_tarball(base_name, base_dir, compression='gzip', verbose=0, dry_run=0, tar_options='-chf')
    -
    mx_msvccompiler__init__(self, *args, **kws)
    -
    run_setup(configurations)
    Run distutils setup.

    -The parameters passed to setup() are extracted from the list
    -of modules, classes or instances given in configurations.

    -Names with leading underscore are removed from the parameters.
    -Parameters which are not strings, lists or tuples are removed
    -as well.  Configurations which occur later in the
    -configurations list override settings of configurations
    -earlier in the list.
    -
    verify_path(path)
    Verify the directories in path for existence and their
    -directory nature.

    -Also removes duplicates from the list.
    -

    - - - - - -
     
    -Data
           FINDINCLPATH = []
    -FINDLIBPATH = []
    -GenPPD = None
    -INCLPATH = ['/usr/include']
    -LIBPATH = ['/usr/local/lib', '/usr/lib']
    -STDINCLPATH = ['/usr/include']
    -STDLIBPATH = ['/usr/lib']
    -_debug = 0
    -bdist_ppm = None
    -compression_programs = {'bzip2': ('.bz2', 'f9'), 'compress': ('.Z', '-f'), 'gzip': ('.gz', '-f9')}
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.simpleparsegrammar.html simpleparse-2.2.0/doc/pydoc/simpleparse.simpleparsegrammar.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.simpleparsegrammar.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.simpleparsegrammar.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,469 +0,0 @@ - - -Python: module simpleparse.simpleparsegrammar - - - - -
     
    - 
    simpleparse.simpleparsegrammar
    index
    /home/mcfletch/pylive/simpleparse/simpleparsegrammar.py
    -

    Default SimpleParse EBNF grammar as a generator with productions

    -This module defines the original SimpleParse
    -grammar.  It uses the generator objects directly
    -as this is the first grammar being written.

    -

    - - - - - -
     
    -Modules
           
    simpleparse.baseparser
    -copy
    -
    simpleparse.generator
    -simpleparse.stt.TextTools.mxTextTools.mxTextTools
    -
    string
    -time
    -
    types
    -

    - - - - - -
     
    -Classes
           
    -
    BaseParser -
    -
    -
    Parser -
    -
    -
    DispatchProcessor(Processor) -
    -
    -
    SPGrammarProcessor -
    -
    -
    -

    - - - - - - - -
     
    -class Parser(BaseParser)
       Parser which generates new parsers from EBNF grammars

    -This parser class allows you to pass in an EBNF grammar as
    -the initialisation parameter.  The EBNF is processed, and a
    -SimpleParse generator object is created as self.generator.

    -Unlike most Parsers, this object is intended to be re-created
    -for each bit of data it parses (i.e. each EBNF), so it warps
    -the standard API a lot.
     
     Methods defined here:
    -
    __init__(self, ebnf, prebuilts=(), methodSource=None, definitionSources=())
    Create a new generator based on the EBNF in simpleparse format
    - -
    buildTagger(self, name=None, processor=None)
    Build the tag-table for parsing the EBNF for this parser
    - -
    -Methods inherited from BaseParser:
    -
    buildProcessor(self)
    Build default processor object for this parser class

    -The default implementation returns None.  The processor
    -can either implement the "method source" API (just provides
    -information about Callouts and the like), or the processor
    -API and the method-source API.  The processor API merely
    -requires that the object be callable, and have the signature:

    -        object( (success, children, nextPosition), buffer)

    -(Note: your object can treat the first item as a single tuple
    -if it likes).

    -See: simpleparse.processor module for details.
    - -
    parse(self, data, production=None, processor=None, start=0, stop=None)
    Parse data with production "production" of this parser

    -data -- data to be parsed, a Python string, for now
    -production -- optional string specifying a non-default production to use
    -        for parsing data
    -processor -- optional pointer to a Processor or MethodSource object for
    -        use in determining reporting format and/or post-processing the results
    -        of the parsing pass.  Can be None if neither is desired (default)
    -start -- starting index for the parsing, default 0
    -stop -- stoping index for the parsing, default len(data)
    - -
    resetBeforeParse(self)
    Called just before the parser's parse method starts working,

    -Allows you to set up special-purpose structures, such as stacks
    -or local storage values.  There is no base implementation.  The
    -base implementation does nothing.
    - -

    - - - - - - - -
     
    -class SPGrammarProcessor(DispatchProcessor)
       Processing object for post-processing an EBNF into a new generator
     
     
    Method resolution order:
    -
    SPGrammarProcessor
    -
    DispatchProcessor
    -
    Processor
    -
    MethodSource
    -
    -
    -Methods defined here:
    -
    CHAR = CHARNODBLQUOTE(self, tup, buffer)
    - -
    CHARBRACE(self, tup, buffer)
    - -
    CHARDASH(self, tup, buffer)
    - -
    CHARNOBRACE(self, (tag, left, right, sublist), buffer)
    - -
    CHARNODBLQUOTE(self, tup, buffer)
    - -
    CHARNOSNGLQUOTE = CHARNODBLQUOTE(self, tup, buffer)
    - -
    CHARRANGE(self, (tag, left, right, sublist), buffer)
    Create a string from first to second item
    - -
    ESCAPEDCHAR(self, (tag, left, right, sublist), buffer)
    - -
    HEXESCAPEDCHAR(self, tup, buffer)
    - -
    OCTALESCAPEDCHAR(self, tup, buffer)
    - -
    SPECIALESCAPEDCHAR(self, tup, buffer)
    - -
    UNICODEESCAPEDCHAR_16(self, (tag, left, right, sublist), buffer)
    Only available in unicode-aware Python versions
    - -
    UNICODEESCAPEDCHAR_32 = UNICODEESCAPEDCHAR_16(self, (tag, left, right, sublist), buffer)
    - -
    __init__(self, prebuilts=(), definitionSources=())
    Create a new generator based on the EBNF in simpleparse format
    - -
    declaration(self, (tag, left, right, sublist), buffer)
    Base declaration from the grammar, a "production" or "rule"
    - -
    element_token(self, (tag, left, right, sublist), buffer)
    get the children, then configure
    - -
    error_on_fail(self, (tag, left, right, children), buffer)
    If present, we are going to make the current object an errorOnFail type,

    -If there's a string literal child, then we use it to create the
    -"message" attribute of the errorOnFail object.
    - -
    fo_group(self, (tag, left, right, sublist), buffer)
    Process a first-of-group into a FirstOf element token
    - -
    literal(self, (tag, left, right, sublist), buffer)
    Turn a literal result into a literal generator
    - -
    lookahead_indicator(self, tup, buffer)
    If present, the lookahead indictor just says "yes", so just return 1
    - -
    name(self, tup, buffer)
    - -
    negpos_indicator(self, tup, buffer)
    return whether indicates negative
    - -
    occurence_indicator(self, tup, buffer)
    Return optional, repeating as a tuple of true/false values
    - -
    range(self, (tag, left, right, sublist), buffer)
    - -
    seq_group(self, (tag, left, right, sublist), buffer)
    Process a sequential-group into a SequentialGroup element token
    - -
    -Data and other attributes defined here:
    -
    negposIndicatorMap = {'+': 0, '-': 1}
    - -
    occurenceIndicatorMap = {'*': (1, 1), '+': (0, 1), '?': (1, 0)}
    - -
    specialescapedmap = {'"': '"', "'": "'", r'\': r'\', 'a': '\x07', 'b': '\x08', 'f': '\x0c', 'n': '\n', 'r': '\r', 't': '\t', 'v': '\x0b'}
    - -
    -Methods inherited from DispatchProcessor:
    -
    __call__(self, value, buffer)
    Process the results of the parsing run over buffer

    -Value can either be: (success, tags, next) for a top-level
    -production, or (tag, left, right, children) for a non-top
    -production.
    - -
    -Methods inherited from Processor:
    -
    __repr__(self)
    Return a representation of the class
    - -

    - - - - - -
     
    -Functions
           
    BMS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    CharSet(...)
    CharSet(definition)

    -Create a character set matching object from the string
    -
    FS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    FSType = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    TagTable(...)
    TagTable(definition[,cachable=1])
    -
    TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    UnicodeTagTable(...)
    TagTable(definition[,cachable=1])
    -
    charsplit(...)
    charsplit(text,char,start=0,stop=len(text))

    -Split text[start:stop] into substrings at char and
    -return the result as list of strings.
    -
    cmp(...)
    cmp(a,b)

    -Compare two valid taglist tuples w/r to their slice
    -position; this is useful for sorting joinlists.
    -
    hex2str(...)
    hex2str(text)

    -Return text interpreted as two byte HEX values converted
    -to a string.
    -
    isascii(...)
    isascii(text,start=0,stop=len(text))

    -Return 1/0 depending on whether text only contains ASCII
    -characters.
    -
    join(...)
    join(joinlist,sep='',start=0,stop=len(joinlist))

    -Copy snippets from different strings together producing a
    -new string
    -The first argument must be a list of tuples or strings;
    -tuples must be of the form (string,l,r[,...]) and turn out
    -as string[l:r]
    -NOTE: the syntax used for negative slices is different
    -than the Python standard: -1 corresponds to the first
    -character *after* the string, e.g. ('Example',0,-1) gives
    -'Example' and not 'Exampl', like in Python
    -sep is an optional separator string, start and stop
    -define the slice of joinlist that is taken into accont.
    -
    joinlist(...)
    joinlist(text,list,start=0,stop=len(text))

    -Takes a list of tuples (replacement,l,r,...) and produces
    -a taglist suitable for join() which creates a copy
    -of text where every slice [l:r] is replaced by the
    -given replacement
    -- the list must be sorted using cmp() as compare function
    -- it may not contain overlapping slices
    -- the slices may not contain negative indices
    -- if the taglist cannot contain overlapping slices, you can
    -  give this function the taglist produced by tag() directly
    -  (sorting is not needed, as the list will already be sorted)
    -- start and stop set the slice to work in, i.e. text[start:stop]
    -
    lower(...)
    lower(text)

    -Return text converted to lower case.
    -
    prefix(...)
    prefix(text,prefixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -prefix out of the tuple of strings given in prefixes.
    -If no prefix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    set(...)
    set(string,logic=1)

    -Returns a character set for string: a bit encoded version
    -of the characters occurring in string.
    -- logic can be set to 0 if all characters *not* in string
    -  should go into the set
    -
    setfind(...)
    setfind(text,set,start=0,stop=len(text))

    -Find the first occurence of any character from set in
    -text[start:stop]
    - set must be a string obtained with set()
    -DEPRECATED: use CharSet().search() instead.
    -
    setsplit(...)
    setsplit(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set,
    -omitting the splitting parts and empty substrings.
    -set must be a string obtained from set()
    -DEPRECATED: use CharSet().split() instead.
    -
    setsplitx(...)
    setsplitx(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set, so
    -that every second entry consists only of characters in set.
    -set must be a string obtained with set()
    -DEPRECATED: use CharSet().splitx() instead.
    -
    setstrip(...)
    setstrip(text,set,start=0,stop=len(text),mode=0)

    -Strip all characters in text[start:stop] appearing in set.
    -mode indicates where to strip (<0: left; =0: left and right;
    ->0: right). set must be a string obtained with set()
    -DEPRECATED: use CharSet().strip() instead.
    -
    splitat(...)
    splitat(text,char,nth=1,start=0,stop=len(text))

    -Split text[start:stop] into two substrings at the nth
    -occurance of char and return the result as 2-tuple. If the
    -character is not found, the second string is empty. nth may
    -be negative: the search is then done from the right and the
    -first string is empty in case the character is not found.
    -
    str2hex(...)
    str2hex(text)

    -Return text converted to a string consisting of two byte
    -HEX values.
    -
    suffix(...)
    suffix(text,suffixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -suffix out of the tuple of strings given in suffixes.
    -If no suffix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    tag(...)
    tag(text,tagtable,sliceleft=0,sliceright=len(text),taglist=[],context=None) 
    -Produce a tag list for a string, given a tag-table
    -- returns a tuple (success, taglist, nextindex)
    -- if taglist == None, then no taglist is created
    -
    upper(...)
    upper(text)

    -Return text converted to upper case.
    -

    - - - - - -
     
    -Data
           A2Z = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    -A2Z_charset = <Character Set object for 'A-Z'>
    -A2Z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -AllIn = 11
    -AllInCharSet = 41
    -AllInSet = 31
    -AllNotIn = 12
    -AppendMatch = 2048
    -AppendTagobj = 1024
    -AppendToTagobj = 512
    -BOYERMOORE = 0
    -Break = 0
    -Call = 201
    -CallArg = 202
    -CallTag = 256
    -EOF = 101
    -FASTSEARCH = 1
    -Fail = 100
    -HAVE_UNICODE = 1
    -Here = 1
    -Is = 13
    -IsIn = 14
    -IsInCharSet = 42
    -IsInSet = 32
    -IsNot = 15
    -IsNotIn = 15
    -Jump = 100
    -JumpTarget = 104
    -LookAhead = 4096
    -Loop = 205
    -LoopControl = 206
    -MatchFail = -1000000
    -MatchOk = 1000000
    -Move = 103
    -NoWord = 211
    -Reset = -1
    -SPGenerator = <simpleparse.generator.Generator instance>
    -Skip = 102
    -SubTable = 207
    -SubTableInList = 208
    -TRIVIAL = 2
    -Table = 203
    -TableInList = 204
    -ThisTable = 999
    -To = 0
    -ToBOF = 0
    -ToEOF = -1
    -Umlaute = '\xc4\xd6\xdc'
    -Umlaute_charset = <Character Set object for '\xc4\xd6\xdc'>
    -Word = 21
    -WordEnd = 23
    -WordStart = 22
    -_seq_children = FirstOfGroup( - children = [ - Name( - ...oken', - ), - ], - terminalValue = 0, -)
    -a2z = 'abcdefghijklmnopqrstuvwxyz'
    -a2z_charset = <Character Set object for 'a-z'>
    -a2z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
    -alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'>
    -alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alphanumeric = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
    -alphanumeric_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'>
    -alphanumeric_set = '\x00\x00\x00\x00\x00\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -any = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -any_charset = <Character Set object for '\x00-\xff'>
    -any_set = '\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -characterrange = Name( - terminalValue = 1, - value = 'range', -)
    -declaration = 'declarationset := declaration+\ndeclaration...,[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F]\n'
    -element_token = Name( - terminalValue = 0, - value = 'element_token', -)
    -formfeed = '\x0c'
    -formfeed_charset = <Character Set object for '\x0c'>
    -german_alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'
    -german_alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTU...hijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'>
    -german_alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00@\x90\x10\x00@\x10'
    -group = Name( - terminalValue = 0, - value = 'group', -)
    -id2cmd = {-1000000: 'MatchFail', -1: 'ToEOF', 0: 'Fail/Jump', 1: 'Here', 11: 'AllIn', 12: 'AllNotIn', 13: 'Is', 14: 'IsIn', 15: 'IsNotIn', 21: 'Word', ...}
    -literal = Name( - terminalValue = 1, - value = 'literal', -)
    -name = Name( - terminalValue = 1, - value = 'name', -)
    -newline = '\r\n'
    -newline_charset = <Character Set object for '\r\n'>
    -newline_set = '\x00$\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -nonwhitespace_charset = <Character Set object for '^ \t\x0b\r\n\x0c'>
    -nonwhitespace_set = '\xff\xc1\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -number = '0123456789'
    -number_charset = <Character Set object for '0-9'>
    -number_set = '\x00\x00\x00\x00\x00\x00\xff\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -sFindWord = 213
    -sWordEnd = 212
    -sWordStart = 211
    -tagtable_cache = {(46912536021760, 0): <String Tag Table object>, (46912540134840, 0): <String Tag Table object>, (46912541410080, 0): <String Tag Table object>, (46912541454848, 0): <String Tag Table object>, (46912541455136, 0): <String Tag Table object>, (46912541455208, 0): <String Tag Table object>, (46912541489264, 0): <String Tag Table object>, (46912541566016, 0): <String Tag Table object>, (46912543903688, 0): <String Tag Table object>, (46912543908136, 0): <String Tag Table object>, ...}
    -to_lower = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -to_upper = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -umlaute = '\xe4\xf6\xfc\xdf'
    -umlaute_charset = <Character Set object for '\xe4\xf6\xfc\xdf'>
    -white = ' \t\x0b'
    -white_charset = <Character Set object for ' \t\x0b'>
    -white_set = '\x00\x02\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -whitespace = Name( - report = 0, - terminalValue = 1, - value = 'ts', -)
    -whitespace_charset = <Character Set object for ' \t\x0b\r\n\x0c'>
    -whitespace_set = '\x00&\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.stt.html simpleparse-2.2.0/doc/pydoc/simpleparse.stt.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.stt.html 2006-02-19 01:05:11.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.stt.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,41 +0,0 @@ - - -Python: package simpleparse.stt - - - - -
     
    - 
    simpleparse.stt
    index
    /home/mcfletch/pylive/simpleparse/stt/__init__.py
    -

    SimpleParse' built-in version of the mxTextTools text-processing engine

    -Copyright (c) 1998-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com
    -Copyright (c) 2000-2003, eGenix.com Software GmbH; mailto:info@egenix.com
    -Copyright (c) 2003-2006, Mike Fletcher; mailto:mcfletch@vrplumber.com

    -See the documentation for further information on copyrights,
    -or contact the author. All Rights Reserved.

    -IMPORTANT:
    -The subpackages included in the mx Extension series may have
    -different license terms imposed on them. Be sure to read the
    -documentation of each subpackage *before* using them.

    -

    - - - - - -
     
    -Package Contents
           TextTools (package) -- mxTextTools - A tools package for fast text processing.
    __init__ -- SimpleParse' built-in version of the mxTextTools text-processing engine

    - - - - - -
     
    -Data
           __copyright__ = 'Copyright (c) 1998-2000, Marc-Andre Lemburg; mai...\n or contact the author. All Rights Reserved.\n'
    -__path__ = ['/home/mcfletch/pylive/simpleparse/stt']
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.stt.__init__.html simpleparse-2.2.0/doc/pydoc/simpleparse.stt.__init__.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.stt.__init__.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.stt.__init__.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,33 +0,0 @@ - - -Python: module simpleparse.stt.__init__ - - - - -
     
    - 
    simpleparse.stt.__init__
    index
    /home/mcfletch/pylive/simpleparse/stt/__init__.py
    -

    SimpleParse' built-in version of the mxTextTools text-processing engine

    -Copyright (c) 1998-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com
    -Copyright (c) 2000-2003, eGenix.com Software GmbH; mailto:info@egenix.com
    -Copyright (c) 2003-2006, Mike Fletcher; mailto:mcfletch@vrplumber.com

    -See the documentation for further information on copyrights,
    -or contact the author. All Rights Reserved.

    -IMPORTANT:
    -The subpackages included in the mx Extension series may have
    -different license terms imposed on them. Be sure to read the
    -documentation of each subpackage *before* using them.

    -

    - - - - - -
     
    -Data
           __copyright__ = 'Copyright (c) 1998-2000, Marc-Andre Lemburg; mai...\n or contact the author. All Rights Reserved.\n'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.stt.TextTools.Constants.html simpleparse-2.2.0/doc/pydoc/simpleparse.stt.TextTools.Constants.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.stt.TextTools.Constants.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.stt.TextTools.Constants.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,28 +0,0 @@ - - -Python: package simpleparse.stt.TextTools.Constants - - - - -
     
    - 
    simpleparse.stt.TextTools.Constants
    index
    /home/mcfletch/pylive/simpleparse/stt/TextTools/Constants/__init__.py
    -

    -

    - - - - - -
     
    -Package Contents
           Sets -- Constants for sets (of characters)
    TagTables -- Constants for writing tag tables
    __init__

    - - - - - -
     
    -Data
           __path__ = ['/home/mcfletch/pylive/simpleparse/stt/TextTools/Constants']
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.stt.TextTools.Constants.__init__.html simpleparse-2.2.0/doc/pydoc/simpleparse.stt.TextTools.Constants.__init__.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.stt.TextTools.Constants.__init__.html 2006-02-19 01:05:17.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.stt.TextTools.Constants.__init__.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,14 +0,0 @@ - - -Python: module simpleparse.stt.TextTools.Constants.__init__ - - - - -
     
    - 
    simpleparse.stt.TextTools.Constants.__init__
    index
    /home/mcfletch/pylive/simpleparse/stt/TextTools/Constants/__init__.py
    -

    - - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.stt.TextTools.Constants.Sets.html simpleparse-2.2.0/doc/pydoc/simpleparse.stt.TextTools.Constants.Sets.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.stt.TextTools.Constants.Sets.html 2006-02-19 01:05:17.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.stt.TextTools.Constants.Sets.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,63 +0,0 @@ - - -Python: module simpleparse.stt.TextTools.Constants.Sets - - - - -
     
    - 
    simpleparse.stt.TextTools.Constants.Sets
    index
    /home/mcfletch/pylive/simpleparse/stt/TextTools/Constants/Sets.py
    -

    Constants for sets (of characters)

    -Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com
    -Copyright (c) 2000-2003, eGenix.com Software GmbH; mailto:info@egenix.com
    -See the documentation for further information on copyrights,
    -or contact the author. All Rights Reserved.

    -

    - - - - - -
     
    -Data
           A2Z = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    -A2Z_charset = <Character Set object for 'A-Z'>
    -A2Z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -Umlaute = '\xc4\xd6\xdc'
    -Umlaute_charset = <Character Set object for '\xc4\xd6\xdc'>
    -a2z = 'abcdefghijklmnopqrstuvwxyz'
    -a2z_charset = <Character Set object for 'a-z'>
    -a2z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
    -alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'>
    -alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alphanumeric = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
    -alphanumeric_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'>
    -alphanumeric_set = '\x00\x00\x00\x00\x00\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -any = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -any_charset = <Character Set object for '\x00-\xff'>
    -any_set = '\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -formfeed = '\x0c'
    -formfeed_charset = <Character Set object for '\x0c'>
    -german_alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'
    -german_alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTU...hijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'>
    -german_alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00@\x90\x10\x00@\x10'
    -newline = '\r\n'
    -newline_charset = <Character Set object for '\r\n'>
    -newline_set = '\x00$\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -nonwhitespace_charset = <Character Set object for '^ \t\x0b\r\n\x0c'>
    -nonwhitespace_set = '\xff\xc1\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -number = '0123456789'
    -number_charset = <Character Set object for '0-9'>
    -number_set = '\x00\x00\x00\x00\x00\x00\xff\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -umlaute = '\xe4\xf6\xfc\xdf'
    -umlaute_charset = <Character Set object for '\xe4\xf6\xfc\xdf'>
    -white = ' \t\x0b'
    -white_charset = <Character Set object for ' \t\x0b'>
    -white_set = '\x00\x02\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -whitespace = ' \t\x0b\r\n\x0c'
    -whitespace_charset = <Character Set object for ' \t\x0b\r\n\x0c'>
    -whitespace_set = '\x00&\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.stt.TextTools.Constants.TagTables.html simpleparse-2.2.0/doc/pydoc/simpleparse.stt.TextTools.Constants.TagTables.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.stt.TextTools.Constants.TagTables.html 2006-02-19 01:05:17.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.stt.TextTools.Constants.TagTables.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,82 +0,0 @@ - - -Python: module simpleparse.stt.TextTools.Constants.TagTables - - - - -
     
    - 
    simpleparse.stt.TextTools.Constants.TagTables
    index
    /home/mcfletch/pylive/simpleparse/stt/TextTools/Constants/TagTables.py
    -

    Constants for writing tag tables

    -These are defined in mxte.h and imported here via the C extension.
    -See the documentation for details about the various constants.

    -Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com
    -Copyright (c) 2000-2003, eGenix.com Software GmbH; mailto:info@egenix.com
    -See the documentation for further information on copyrights,
    -or contact the author. All Rights Reserved.

    -

    - - - - - -
     
    -Functions
           
    _module_init()
    -

    - - - - - -
     
    -Data
           AllIn = 11
    -AllInCharSet = 41
    -AllInSet = 31
    -AllNotIn = 12
    -AppendMatch = 2048
    -AppendTagobj = 1024
    -AppendToTagobj = 512
    -Break = 0
    -Call = 201
    -CallArg = 202
    -CallTag = 256
    -EOF = 101
    -Fail = 100
    -Here = 1
    -Is = 13
    -IsIn = 14
    -IsInCharSet = 42
    -IsInSet = 32
    -IsNot = 15
    -IsNotIn = 15
    -Jump = 100
    -JumpTarget = 104
    -LookAhead = 4096
    -Loop = 205
    -LoopControl = 206
    -MatchFail = -1000000
    -MatchOk = 1000000
    -Move = 103
    -NoWord = 211
    -Reset = -1
    -Skip = 102
    -SubTable = 207
    -SubTableInList = 208
    -Table = 203
    -TableInList = 204
    -ThisTable = 999
    -To = 0
    -ToBOF = 0
    -ToEOF = -1
    -Word = 21
    -WordEnd = 23
    -WordStart = 22
    -id2cmd = {-1000000: 'MatchFail', -1: 'ToEOF', 0: 'Fail/Jump', 1: 'Here', 11: 'AllIn', 12: 'AllNotIn', 13: 'Is', 14: 'IsIn', 15: 'IsNotIn', 21: 'Word', ...}
    -sFindWord = 213
    -sWordEnd = 212
    -sWordStart = 211
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.stt.TextTools.html simpleparse-2.2.0/doc/pydoc/simpleparse.stt.TextTools.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.stt.TextTools.html 2006-02-19 01:05:11.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.stt.TextTools.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,268 +0,0 @@ - - -Python: package simpleparse.stt.TextTools - - - - -
     
    - 
    simpleparse.stt.TextTools (version 2.1.0)
    index
    /home/mcfletch/pylive/simpleparse/stt/TextTools/__init__.py
    -

    mxTextTools - A tools package for fast text processing.

    -Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com
    -Copyright (c) 2000-2003, eGenix.com Software GmbH; mailto:info@egenix.com
    -Copyright (c) 2003-2006, Mike Fletcher; mailto:mcfletch@vrplumber.com
    -See the documentation for further information on copyrights,
    -or contact the author. All Rights Reserved.

    -

    - - - - - -
     
    -Package Contents
           Constants (package)
    TextTools -- mxTextTools - A tools package for fast text processing.
    __init__ -- mxTextTools - A tools package for fast text processing.
    mxTextTools (package) -- mxTextTools -- Tools for fast text processing. Version 2.1.0

    - - - - - -
     
    -Functions
           
    BMS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    CharSet(...)
    CharSet(definition)

    -Create a character set matching object from the string
    -
    FS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    FSType = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    TagTable(...)
    TagTable(definition[,cachable=1])
    -
    TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    UnicodeTagTable(...)
    TagTable(definition[,cachable=1])
    -
    _BMS(match, translate)
    # Needed for backward compatibility:
    -
    _CS(definition)
    # Shortcuts for pickle (reduces the pickle's length)
    -
    _FS(match, translate)
    -
    _TS(match, translate, algorithm)
    -
    _TT(definition)
    -
    charsplit(...)
    charsplit(text,char,start=0,stop=len(text))

    -Split text[start:stop] into substrings at char and
    -return the result as list of strings.
    -
    cmp(...)
    cmp(a,b)

    -Compare two valid taglist tuples w/r to their slice
    -position; this is useful for sorting joinlists.
    -
    hex2str(...)
    hex2str(text)

    -Return text interpreted as two byte HEX values converted
    -to a string.
    -
    isascii(...)
    isascii(text,start=0,stop=len(text))

    -Return 1/0 depending on whether text only contains ASCII
    -characters.
    -
    join(...)
    join(joinlist,sep='',start=0,stop=len(joinlist))

    -Copy snippets from different strings together producing a
    -new string
    -The first argument must be a list of tuples or strings;
    -tuples must be of the form (string,l,r[,...]) and turn out
    -as string[l:r]
    -NOTE: the syntax used for negative slices is different
    -than the Python standard: -1 corresponds to the first
    -character *after* the string, e.g. ('Example',0,-1) gives
    -'Example' and not 'Exampl', like in Python
    -sep is an optional separator string, start and stop
    -define the slice of joinlist that is taken into accont.
    -
    joinlist(...)
    joinlist(text,list,start=0,stop=len(text))

    -Takes a list of tuples (replacement,l,r,...) and produces
    -a taglist suitable for join() which creates a copy
    -of text where every slice [l:r] is replaced by the
    -given replacement
    -- the list must be sorted using cmp() as compare function
    -- it may not contain overlapping slices
    -- the slices may not contain negative indices
    -- if the taglist cannot contain overlapping slices, you can
    -  give this function the taglist produced by tag() directly
    -  (sorting is not needed, as the list will already be sorted)
    -- start and stop set the slice to work in, i.e. text[start:stop]
    -
    lower(...)
    lower(text)

    -Return text converted to lower case.
    -
    prefix(...)
    prefix(text,prefixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -prefix out of the tuple of strings given in prefixes.
    -If no prefix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    set(...)
    set(string,logic=1)

    -Returns a character set for string: a bit encoded version
    -of the characters occurring in string.
    -- logic can be set to 0 if all characters *not* in string
    -  should go into the set
    -
    setfind(...)
    setfind(text,set,start=0,stop=len(text))

    -Find the first occurence of any character from set in
    -text[start:stop]
    - set must be a string obtained with set()
    -DEPRECATED: use CharSet().search() instead.
    -
    setsplit(...)
    setsplit(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set,
    -omitting the splitting parts and empty substrings.
    -set must be a string obtained from set()
    -DEPRECATED: use CharSet().split() instead.
    -
    setsplitx(...)
    setsplitx(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set, so
    -that every second entry consists only of characters in set.
    -set must be a string obtained with set()
    -DEPRECATED: use CharSet().splitx() instead.
    -
    setstrip(...)
    setstrip(text,set,start=0,stop=len(text),mode=0)

    -Strip all characters in text[start:stop] appearing in set.
    -mode indicates where to strip (<0: left; =0: left and right;
    ->0: right). set must be a string obtained with set()
    -DEPRECATED: use CharSet().strip() instead.
    -
    splitat(...)
    splitat(text,char,nth=1,start=0,stop=len(text))

    -Split text[start:stop] into two substrings at the nth
    -occurance of char and return the result as 2-tuple. If the
    -character is not found, the second string is empty. nth may
    -be negative: the search is then done from the right and the
    -first string is empty in case the character is not found.
    -
    str2hex(...)
    str2hex(text)

    -Return text converted to a string consisting of two byte
    -HEX values.
    -
    suffix(...)
    suffix(text,suffixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -suffix out of the tuple of strings given in suffixes.
    -If no suffix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    tag(...)
    tag(text,tagtable,sliceleft=0,sliceright=len(text),taglist=[],context=None) 
    -Produce a tag list for a string, given a tag-table
    -- returns a tuple (success, taglist, nextindex)
    -- if taglist == None, then no taglist is created
    -
    upper(...)
    upper(text)

    -Return text converted to upper case.
    -

    - - - - - -
     
    -Data
           A2Z = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    -A2Z_charset = <Character Set object for 'A-Z'>
    -A2Z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -AllIn = 11
    -AllInCharSet = 41
    -AllInSet = 31
    -AllNotIn = 12
    -AppendMatch = 2048
    -AppendTagobj = 1024
    -AppendToTagobj = 512
    -BOYERMOORE = 0
    -Break = 0
    -Call = 201
    -CallArg = 202
    -CallTag = 256
    -EOF = 101
    -FASTSEARCH = 1
    -Fail = 100
    -Here = 1
    -Is = 13
    -IsIn = 14
    -IsInCharSet = 42
    -IsInSet = 32
    -IsNot = 15
    -IsNotIn = 15
    -Jump = 100
    -JumpTarget = 104
    -LookAhead = 4096
    -Loop = 205
    -LoopControl = 206
    -MatchFail = -1000000
    -MatchOk = 1000000
    -Move = 103
    -NoWord = 211
    -Reset = -1
    -Skip = 102
    -SubTable = 207
    -SubTableInList = 208
    -TRIVIAL = 2
    -Table = 203
    -TableInList = 204
    -ThisTable = 999
    -To = 0
    -ToBOF = 0
    -ToEOF = -1
    -Umlaute = '\xc4\xd6\xdc'
    -Umlaute_charset = <Character Set object for '\xc4\xd6\xdc'>
    -Word = 21
    -WordEnd = 23
    -WordStart = 22
    -__path__ = ['/home/mcfletch/pylive/simpleparse/stt/TextTools']
    -a2z = 'abcdefghijklmnopqrstuvwxyz'
    -a2z_charset = <Character Set object for 'a-z'>
    -a2z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
    -alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'>
    -alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alphanumeric = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
    -alphanumeric_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'>
    -alphanumeric_set = '\x00\x00\x00\x00\x00\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -any = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -any_charset = <Character Set object for '\x00-\xff'>
    -any_set = '\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -formfeed = '\x0c'
    -formfeed_charset = <Character Set object for '\x0c'>
    -german_alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'
    -german_alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTU...hijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'>
    -german_alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00@\x90\x10\x00@\x10'
    -id2cmd = {-1000000: 'MatchFail', -1: 'ToEOF', 0: 'Fail/Jump', 1: 'Here', 11: 'AllIn', 12: 'AllNotIn', 13: 'Is', 14: 'IsIn', 15: 'IsNotIn', 21: 'Word', ...}
    -newline = '\r\n'
    -newline_charset = <Character Set object for '\r\n'>
    -newline_set = '\x00$\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -nonwhitespace_charset = <Character Set object for '^ \t\x0b\r\n\x0c'>
    -nonwhitespace_set = '\xff\xc1\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -number = '0123456789'
    -number_charset = <Character Set object for '0-9'>
    -number_set = '\x00\x00\x00\x00\x00\x00\xff\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -sFindWord = 213
    -sWordEnd = 212
    -sWordStart = 211
    -tagtable_cache = {(46912536022048, 0): <String Tag Table object>, (46912536088816, 0): <String Tag Table object>, (46912539588096, 0): <String Tag Table object>, (46912539588432, 0): <String Tag Table object>, (46912539588992, 0): <String Tag Table object>, (46912539628944, 0): <String Tag Table object>, (46912540112080, 0): <String Tag Table object>, (46912540113360, 0): <String Tag Table object>, (46912540134528, 0): <String Tag Table object>, (46912540134632, 0): <String Tag Table object>, ...}
    -to_lower = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -to_upper = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -umlaute = '\xe4\xf6\xfc\xdf'
    -umlaute_charset = <Character Set object for '\xe4\xf6\xfc\xdf'>
    -white = ' \t\x0b'
    -white_charset = <Character Set object for ' \t\x0b'>
    -white_set = '\x00\x02\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -whitespace = ' \t\x0b\r\n\x0c'
    -whitespace_charset = <Character Set object for ' \t\x0b\r\n\x0c'>
    -whitespace_set = '\x00&\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.stt.TextTools.__init__.html simpleparse-2.2.0/doc/pydoc/simpleparse.stt.TextTools.__init__.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.stt.TextTools.__init__.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.stt.TextTools.__init__.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,271 +0,0 @@ - - -Python: module simpleparse.stt.TextTools.__init__ - - - - -
     
    - 
    simpleparse.stt.TextTools.__init__ (version 2.1.0)
    index
    /home/mcfletch/pylive/simpleparse/stt/TextTools/__init__.py
    -

    mxTextTools - A tools package for fast text processing.

    -Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com
    -Copyright (c) 2000-2003, eGenix.com Software GmbH; mailto:info@egenix.com
    -Copyright (c) 2003-2006, Mike Fletcher; mailto:mcfletch@vrplumber.com
    -See the documentation for further information on copyrights,
    -or contact the author. All Rights Reserved.

    -

    - - - - - -
     
    -Modules
           
    simpleparse.stt.TextTools.mxTextTools.mxTextTools
    -
    string
    -
    time
    -
    types
    -

    - - - - - -
     
    -Functions
           
    BMS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    CharSet(...)
    CharSet(definition)

    -Create a character set matching object from the string
    -
    FS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    FSType = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    TagTable(...)
    TagTable(definition[,cachable=1])
    -
    TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    UnicodeTagTable(...)
    TagTable(definition[,cachable=1])
    -
    _BMS(match, translate)
    # Needed for backward compatibility:
    -
    _CS(definition)
    # Shortcuts for pickle (reduces the pickle's length)
    -
    _FS(match, translate)
    -
    _TS(match, translate, algorithm)
    -
    _TT(definition)
    -
    charsplit(...)
    charsplit(text,char,start=0,stop=len(text))

    -Split text[start:stop] into substrings at char and
    -return the result as list of strings.
    -
    cmp(...)
    cmp(a,b)

    -Compare two valid taglist tuples w/r to their slice
    -position; this is useful for sorting joinlists.
    -
    hex2str(...)
    hex2str(text)

    -Return text interpreted as two byte HEX values converted
    -to a string.
    -
    isascii(...)
    isascii(text,start=0,stop=len(text))

    -Return 1/0 depending on whether text only contains ASCII
    -characters.
    -
    join(...)
    join(joinlist,sep='',start=0,stop=len(joinlist))

    -Copy snippets from different strings together producing a
    -new string
    -The first argument must be a list of tuples or strings;
    -tuples must be of the form (string,l,r[,...]) and turn out
    -as string[l:r]
    -NOTE: the syntax used for negative slices is different
    -than the Python standard: -1 corresponds to the first
    -character *after* the string, e.g. ('Example',0,-1) gives
    -'Example' and not 'Exampl', like in Python
    -sep is an optional separator string, start and stop
    -define the slice of joinlist that is taken into accont.
    -
    joinlist(...)
    joinlist(text,list,start=0,stop=len(text))

    -Takes a list of tuples (replacement,l,r,...) and produces
    -a taglist suitable for join() which creates a copy
    -of text where every slice [l:r] is replaced by the
    -given replacement
    -- the list must be sorted using cmp() as compare function
    -- it may not contain overlapping slices
    -- the slices may not contain negative indices
    -- if the taglist cannot contain overlapping slices, you can
    -  give this function the taglist produced by tag() directly
    -  (sorting is not needed, as the list will already be sorted)
    -- start and stop set the slice to work in, i.e. text[start:stop]
    -
    lower(...)
    lower(text)

    -Return text converted to lower case.
    -
    prefix(...)
    prefix(text,prefixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -prefix out of the tuple of strings given in prefixes.
    -If no prefix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    set(...)
    set(string,logic=1)

    -Returns a character set for string: a bit encoded version
    -of the characters occurring in string.
    -- logic can be set to 0 if all characters *not* in string
    -  should go into the set
    -
    setfind(...)
    setfind(text,set,start=0,stop=len(text))

    -Find the first occurence of any character from set in
    -text[start:stop]
    - set must be a string obtained with set()
    -DEPRECATED: use CharSet().search() instead.
    -
    setsplit(...)
    setsplit(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set,
    -omitting the splitting parts and empty substrings.
    -set must be a string obtained from set()
    -DEPRECATED: use CharSet().split() instead.
    -
    setsplitx(...)
    setsplitx(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set, so
    -that every second entry consists only of characters in set.
    -set must be a string obtained with set()
    -DEPRECATED: use CharSet().splitx() instead.
    -
    setstrip(...)
    setstrip(text,set,start=0,stop=len(text),mode=0)

    -Strip all characters in text[start:stop] appearing in set.
    -mode indicates where to strip (<0: left; =0: left and right;
    ->0: right). set must be a string obtained with set()
    -DEPRECATED: use CharSet().strip() instead.
    -
    splitat(...)
    splitat(text,char,nth=1,start=0,stop=len(text))

    -Split text[start:stop] into two substrings at the nth
    -occurance of char and return the result as 2-tuple. If the
    -character is not found, the second string is empty. nth may
    -be negative: the search is then done from the right and the
    -first string is empty in case the character is not found.
    -
    str2hex(...)
    str2hex(text)

    -Return text converted to a string consisting of two byte
    -HEX values.
    -
    suffix(...)
    suffix(text,suffixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -suffix out of the tuple of strings given in suffixes.
    -If no suffix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    tag(...)
    tag(text,tagtable,sliceleft=0,sliceright=len(text),taglist=[],context=None) 
    -Produce a tag list for a string, given a tag-table
    -- returns a tuple (success, taglist, nextindex)
    -- if taglist == None, then no taglist is created
    -
    upper(...)
    upper(text)

    -Return text converted to upper case.
    -

    - - - - - -
     
    -Data
           A2Z = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    -A2Z_charset = <Character Set object for 'A-Z'>
    -A2Z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -AllIn = 11
    -AllInCharSet = 41
    -AllInSet = 31
    -AllNotIn = 12
    -AppendMatch = 2048
    -AppendTagobj = 1024
    -AppendToTagobj = 512
    -BOYERMOORE = 0
    -Break = 0
    -Call = 201
    -CallArg = 202
    -CallTag = 256
    -EOF = 101
    -FASTSEARCH = 1
    -Fail = 100
    -Here = 1
    -Is = 13
    -IsIn = 14
    -IsInCharSet = 42
    -IsInSet = 32
    -IsNot = 15
    -IsNotIn = 15
    -Jump = 100
    -JumpTarget = 104
    -LookAhead = 4096
    -Loop = 205
    -LoopControl = 206
    -MatchFail = -1000000
    -MatchOk = 1000000
    -Move = 103
    -NoWord = 211
    -Reset = -1
    -Skip = 102
    -SubTable = 207
    -SubTableInList = 208
    -TRIVIAL = 2
    -Table = 203
    -TableInList = 204
    -ThisTable = 999
    -To = 0
    -ToBOF = 0
    -ToEOF = -1
    -Umlaute = '\xc4\xd6\xdc'
    -Umlaute_charset = <Character Set object for '\xc4\xd6\xdc'>
    -Word = 21
    -WordEnd = 23
    -WordStart = 22
    -a2z = 'abcdefghijklmnopqrstuvwxyz'
    -a2z_charset = <Character Set object for 'a-z'>
    -a2z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
    -alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'>
    -alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alphanumeric = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
    -alphanumeric_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'>
    -alphanumeric_set = '\x00\x00\x00\x00\x00\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -any = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -any_charset = <Character Set object for '\x00-\xff'>
    -any_set = '\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -formfeed = '\x0c'
    -formfeed_charset = <Character Set object for '\x0c'>
    -german_alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'
    -german_alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTU...hijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'>
    -german_alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00@\x90\x10\x00@\x10'
    -id2cmd = {-1000000: 'MatchFail', -1: 'ToEOF', 0: 'Fail/Jump', 1: 'Here', 11: 'AllIn', 12: 'AllNotIn', 13: 'Is', 14: 'IsIn', 15: 'IsNotIn', 21: 'Word', ...}
    -newline = '\r\n'
    -newline_charset = <Character Set object for '\r\n'>
    -newline_set = '\x00$\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -nonwhitespace_charset = <Character Set object for '^ \t\x0b\r\n\x0c'>
    -nonwhitespace_set = '\xff\xc1\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -number = '0123456789'
    -number_charset = <Character Set object for '0-9'>
    -number_set = '\x00\x00\x00\x00\x00\x00\xff\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -sFindWord = 213
    -sWordEnd = 212
    -sWordStart = 211
    -tagtable_cache = {(46912536021760, 0): <String Tag Table object>, (46912540134840, 0): <String Tag Table object>, (46912541410080, 0): <String Tag Table object>, (46912541454848, 0): <String Tag Table object>, (46912541455136, 0): <String Tag Table object>, (46912541455208, 0): <String Tag Table object>, (46912541489264, 0): <String Tag Table object>, (46912541566016, 0): <String Tag Table object>, (46912543903688, 0): <String Tag Table object>, (46912543908136, 0): <String Tag Table object>, ...}
    -to_lower = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -to_upper = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -umlaute = '\xe4\xf6\xfc\xdf'
    -umlaute_charset = <Character Set object for '\xe4\xf6\xfc\xdf'>
    -white = ' \t\x0b'
    -white_charset = <Character Set object for ' \t\x0b'>
    -white_set = '\x00\x02\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -whitespace = ' \t\x0b\r\n\x0c'
    -whitespace_charset = <Character Set object for ' \t\x0b\r\n\x0c'>
    -whitespace_set = '\x00&\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.stt.TextTools.mxTextTools.html simpleparse-2.2.0/doc/pydoc/simpleparse.stt.TextTools.mxTextTools.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.stt.TextTools.mxTextTools.html 2006-02-19 01:05:11.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.stt.TextTools.mxTextTools.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,205 +0,0 @@ - - -Python: module simpleparse.stt.TextTools.mxTextTools.mxTextTools - - - - -
     
    - 
    simpleparse.stt.TextTools.mxTextTools.mxTextTools (version 2.1.0)
    index
    /home/mcfletch/pylive/simpleparse/stt/TextTools/mxTextTools/mxTextTools.so
    -

    mxTextTools -- Tools for fast text processing. Version 2.1.0

    -Copyright (c) 1997-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com
    -Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com

    -Copyright (c) 2003-2006, Mike Fletcher; mailto:mcfletch@vrplumber.com

    -                 All Rights Reserved

    -See the documentation for further information on copyrights,
    -or contact the author.

    -

    - - - - - -
     
    -Functions
           
    CharSet(...)
    CharSet(definition)

    -Create a character set matching object from the string
    -
    TagTable(...)
    TagTable(definition[,cachable=1])
    -
    TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    UnicodeTagTable(...)
    TagTable(definition[,cachable=1])
    -
    charsplit(...)
    charsplit(text,char,start=0,stop=len(text))

    -Split text[start:stop] into substrings at char and
    -return the result as list of strings.
    -
    cmp(...)
    cmp(a,b)

    -Compare two valid taglist tuples w/r to their slice
    -position; this is useful for sorting joinlists.
    -
    hex2str(...)
    hex2str(text)

    -Return text interpreted as two byte HEX values converted
    -to a string.
    -
    isascii(...)
    isascii(text,start=0,stop=len(text))

    -Return 1/0 depending on whether text only contains ASCII
    -characters.
    -
    join(...)
    join(joinlist,sep='',start=0,stop=len(joinlist))

    -Copy snippets from different strings together producing a
    -new string
    -The first argument must be a list of tuples or strings;
    -tuples must be of the form (string,l,r[,...]) and turn out
    -as string[l:r]
    -NOTE: the syntax used for negative slices is different
    -than the Python standard: -1 corresponds to the first
    -character *after* the string, e.g. ('Example',0,-1) gives
    -'Example' and not 'Exampl', like in Python
    -sep is an optional separator string, start and stop
    -define the slice of joinlist that is taken into accont.
    -
    joinlist(...)
    joinlist(text,list,start=0,stop=len(text))

    -Takes a list of tuples (replacement,l,r,...) and produces
    -a taglist suitable for join() which creates a copy
    -of text where every slice [l:r] is replaced by the
    -given replacement
    -- the list must be sorted using cmp() as compare function
    -- it may not contain overlapping slices
    -- the slices may not contain negative indices
    -- if the taglist cannot contain overlapping slices, you can
    -  give this function the taglist produced by tag() directly
    -  (sorting is not needed, as the list will already be sorted)
    -- start and stop set the slice to work in, i.e. text[start:stop]
    -
    lower(...)
    lower(text)

    -Return text converted to lower case.
    -
    prefix(...)
    prefix(text,prefixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -prefix out of the tuple of strings given in prefixes.
    -If no prefix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    set(...)
    set(string,logic=1)

    -Returns a character set for string: a bit encoded version
    -of the characters occurring in string.
    -- logic can be set to 0 if all characters *not* in string
    -  should go into the set
    -
    setfind(...)
    setfind(text,set,start=0,stop=len(text))

    -Find the first occurence of any character from set in
    -text[start:stop]
    - set must be a string obtained with set()
    -DEPRECATED: use CharSet().search() instead.
    -
    setsplit(...)
    setsplit(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set,
    -omitting the splitting parts and empty substrings.
    -set must be a string obtained from set()
    -DEPRECATED: use CharSet().split() instead.
    -
    setsplitx(...)
    setsplitx(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set, so
    -that every second entry consists only of characters in set.
    -set must be a string obtained with set()
    -DEPRECATED: use CharSet().splitx() instead.
    -
    setstrip(...)
    setstrip(text,set,start=0,stop=len(text),mode=0)

    -Strip all characters in text[start:stop] appearing in set.
    -mode indicates where to strip (<0: left; =0: left and right;
    ->0: right). set must be a string obtained with set()
    -DEPRECATED: use CharSet().strip() instead.
    -
    splitat(...)
    splitat(text,char,nth=1,start=0,stop=len(text))

    -Split text[start:stop] into two substrings at the nth
    -occurance of char and return the result as 2-tuple. If the
    -character is not found, the second string is empty. nth may
    -be negative: the search is then done from the right and the
    -first string is empty in case the character is not found.
    -
    str2hex(...)
    str2hex(text)

    -Return text converted to a string consisting of two byte
    -HEX values.
    -
    suffix(...)
    suffix(text,suffixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -suffix out of the tuple of strings given in suffixes.
    -If no suffix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    tag(...)
    tag(text,tagtable,sliceleft=0,sliceright=len(text),taglist=[],context=None) 
    -Produce a tag list for a string, given a tag-table
    -- returns a tuple (success, taglist, nextindex)
    -- if taglist == None, then no taglist is created
    -
    upper(...)
    upper(text)

    -Return text converted to upper case.
    -

    - - - - - -
     
    -Data
           BOYERMOORE = 0
    -FASTSEARCH = 1
    -TRIVIAL = 2
    -_const_AllIn = 11
    -_const_AllInCharSet = 41
    -_const_AllInSet = 31
    -_const_AllNotIn = 12
    -_const_AppendMatch = 2048
    -_const_AppendTagobj = 1024
    -_const_AppendToTagobj = 512
    -_const_Break = 0
    -_const_Call = 201
    -_const_CallArg = 202
    -_const_CallTag = 256
    -_const_EOF = 101
    -_const_Fail = 100
    -_const_Here = 1
    -_const_Is = 13
    -_const_IsIn = 14
    -_const_IsInCharSet = 42
    -_const_IsInSet = 32
    -_const_IsNot = 15
    -_const_IsNotIn = 15
    -_const_Jump = 100
    -_const_JumpTarget = 104
    -_const_LookAhead = 4096
    -_const_Loop = 205
    -_const_LoopControl = 206
    -_const_MatchFail = -1000000
    -_const_MatchOk = 1000000
    -_const_Move = 103
    -_const_NoWord = 211
    -_const_Reset = -1
    -_const_Skip = 102
    -_const_SubTable = 207
    -_const_SubTableInList = 208
    -_const_Table = 203
    -_const_TableInList = 204
    -_const_ThisTable = 999
    -_const_To = 0
    -_const_ToBOF = 0
    -_const_ToEOF = -1
    -_const_Word = 21
    -_const_WordEnd = 23
    -_const_WordStart = 22
    -_const_sFindWord = 213
    -_const_sWordEnd = 212
    -_const_sWordStart = 211
    -tagtable_cache = {(46912536022048, 0): <String Tag Table object>, (46912536088816, 0): <String Tag Table object>, (46912539588096, 0): <String Tag Table object>, (46912539588432, 0): <String Tag Table object>, (46912539588992, 0): <String Tag Table object>, (46912539628944, 0): <String Tag Table object>, (46912540112080, 0): <String Tag Table object>, (46912540113360, 0): <String Tag Table object>, (46912540134528, 0): <String Tag Table object>, (46912540134632, 0): <String Tag Table object>, ...}
    -to_lower = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -to_upper = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.stt.TextTools.TextTools.html simpleparse-2.2.0/doc/pydoc/simpleparse.stt.TextTools.TextTools.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.stt.TextTools.TextTools.html 2006-02-19 01:05:14.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.stt.TextTools.TextTools.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,421 +0,0 @@ - - -Python: module simpleparse.stt.TextTools.TextTools - - - - -
     
    - 
    simpleparse.stt.TextTools.TextTools (version 2.1.0)
    index
    /home/mcfletch/pylive/simpleparse/stt/TextTools/TextTools.py
    -

    mxTextTools - A tools package for fast text processing.

    -Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com
    -Copyright (c) 2000-2003, eGenix.com Software GmbH; mailto:info@egenix.com
    -See the documentation for further information on copyrights,
    -or contact the author. All Rights Reserved.

    -

    - - - - - -
     
    -Modules
           
    simpleparse.stt.TextTools.mxTextTools.mxTextTools
    -
    string
    -
    time
    -
    types
    -

    - - - - - -
     
    -Classes
           
    -
    _timer -
    -

    - - - - - - - -
     
    -class _timer
       timer class with a quite obvious interface
    -- .start() starts a fairly accurate CPU-time timer plus an
    -  absolute timer
    -- .stop() stops the timer and returns a tuple: the CPU-time in seconds
    -  and the absolute time elapsed since .start() was called
     
     Methods defined here:
    -
    __str__(self)
    - -
    abstime(self, clock=<built-in function clock>, time=<built-in function time>)
    - -
    start(self, clock=<built-in function clock>, time=<built-in function time>)
    - -
    stop(self, clock=<built-in function clock>, time=<built-in function time>)
    - -
    usertime(self, clock=<built-in function clock>, time=<built-in function time>)
    - -
    -Data and other attributes defined here:
    -
    atime = 0
    - -
    utime = 0
    - -

    - - - - - -
     
    -Functions
           
    BMS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    CharSet(...)
    CharSet(definition)

    -Create a character set matching object from the string
    -
    FS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    FSType = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    TagTable(...)
    TagTable(definition[,cachable=1])
    -
    TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    UnicodeTagTable(...)
    TagTable(definition[,cachable=1])
    -
    _bench(file='mxTextTools/mxTextTools.c')
    -
    _lookup_dict(l, index=0)
    # aid for matching from a list of words
    -
    _replace2(text, what, with, start=0, stop=None, join=<built-in function join>, joinlist=<built-in function joinlist>, tag=<built-in function tag>, TextSearchType=<type 'TextSearch'>, TextSearch=<built-in function TextSearch>)
    Analogon to string.replace; returns a string with all occurences
    -of what in text[start:stop] replaced by with.

    -This version uses a one entry tag-table and a
    -Boyer-Moore-Search-object.  what can be a string or a
    -TextSearch search object.  It's faster than string.replace in
    -those cases, where the what-string gets long and/or many
    -replacements are found; faster meaning from a few percent up to
    -many times as fast

    -start and stop define the slice of text to work in.  stop
    -defaults to len(text).
    -
    _replace3(text, what, with, join=<function join>, TextSearch=<built-in function TextSearch>, TextSearchType=<type 'TextSearch'>)
    -
    _replace4(text, what, with, join=<built-in function join>, joinlist=<built-in function joinlist>, tag=<built-in function tag>, TextSearch=<built-in function TextSearch>, TextSearchType=<type 'TextSearch'>)
    -
    _tagdict(text, dict, prefix, taglist)
    # helper for tagdict
    -
    charsplit(...)
    charsplit(text,char,start=0,stop=len(text))

    -Split text[start:stop] into substrings at char and
    -return the result as list of strings.
    -
    cmp(...)
    cmp(a,b)

    -Compare two valid taglist tuples w/r to their slice
    -position; this is useful for sorting joinlists.
    -
    collapse(text, separator=' ', join=<built-in function join>, charset=<Character Set object for '\r\n \t\x0b\r\n\x0c'>)
    Eliminates newline characters and compresses whitespace
    -characters into one space.

    -The result is a one line text string. Tim Peters will like
    -this function called with '-' separator ;-)
    -
    countlines(text, linecount_table=((None, 13, '\r', 1), (None, 13, '\n', 1), ('line', 41, <Character Set object for '^\r\n'>, 1, -2), (None, 101, 1, 1, 1000000), ('empty line', 102, 0, 0, -4)))
    Returns the number of lines in text.

    -Line ends are treated just like for splitlines() in a
    -portable way.
    -
    find(text, what, start=0, stop=None, SearchObject=<built-in function TextSearch>)
    A faster replacement for string.find().

    -Uses a search object for the task. Returns the position of the
    -first occurance of what in text[start:stop]. stop defaults to
    -len(text).  Returns -1 in case no occurance was found.
    -
    findall(text, what, start=0, stop=None, SearchObject=<built-in function TextSearch>)
    Find all occurances of what in text.

    -Uses a search object for the task. Returns a list of slice
    -tuples (l,r) marking the all occurances in
    -text[start:stop]. stop defaults to len(text).  Returns an
    -empty list in case no occurance was found.
    -
    format_entry(table, i, TupleType=<type 'tuple'>)
    Returns a pp-formatted tag table entry as string
    -
    format_table(table, i=-1)
    Returns a pp-formatted version of the tag table as string
    -
    hex2str(...)
    hex2str(text)

    -Return text interpreted as two byte HEX values converted
    -to a string.
    -
    invset(chars)
    Return a set with all characters *except* the ones in chars.
    -
    is_whitespace(text, start=0, stop=None, charset=<Character Set object for '^ \t\x0b\r\n\x0c'>)
    Return 1 iff text[start:stop] only contains whitespace
    -characters (as defined in Constants/Sets.py), 0 otherwise.
    -
    isascii(...)
    isascii(text,start=0,stop=len(text))

    -Return 1/0 depending on whether text only contains ASCII
    -characters.
    -
    join(...)
    join(joinlist,sep='',start=0,stop=len(joinlist))

    -Copy snippets from different strings together producing a
    -new string
    -The first argument must be a list of tuples or strings;
    -tuples must be of the form (string,l,r[,...]) and turn out
    -as string[l:r]
    -NOTE: the syntax used for negative slices is different
    -than the Python standard: -1 corresponds to the first
    -character *after* the string, e.g. ('Example',0,-1) gives
    -'Example' and not 'Exampl', like in Python
    -sep is an optional separator string, start and stop
    -define the slice of joinlist that is taken into accont.
    -
    joinlist(...)
    joinlist(text,list,start=0,stop=len(text))

    -Takes a list of tuples (replacement,l,r,...) and produces
    -a taglist suitable for join() which creates a copy
    -of text where every slice [l:r] is replaced by the
    -given replacement
    -- the list must be sorted using cmp() as compare function
    -- it may not contain overlapping slices
    -- the slices may not contain negative indices
    -- if the taglist cannot contain overlapping slices, you can
    -  give this function the taglist produced by tag() directly
    -  (sorting is not needed, as the list will already be sorted)
    -- start and stop set the slice to work in, i.e. text[start:stop]
    -
    linesplit = splitlines(text, tag=<built-in function tag>, linesplit_table=((None, 13, '\r', 1), (None, 13, '\n', 1), ('line', 2089, <Character Set object for '^\r\n'>, 1, -2), (None, 101, 1, 1, 1000000), ('empty line', 2150, 0, 0, -4)))
    Split text into a list of single lines.

    -        The following combinations are considered to be line-ends:
    -        ' ', '
    -', '
    -'; they may be used in any combination.  The
    -        line-end indicators are removed from the strings prior to
    -        adding them to the list.

    -        This function allows dealing with text files from Macs, PCs
    -        and Unix origins in a portable way.
    -
    lower(...)
    lower(text)

    -Return text converted to lower case.
    -
    multireplace(text, replacements, start=0, stop=None, join=<built-in function join>, joinlist=<built-in function joinlist>)
    Apply multiple replacement to a text at once.

    -replacements must be list of tuples (replacement, left,
    -right).  It is used to replace the slice text[left:right] with
    -the string replacement.

    -Note that the replacements do not affect one another.  Indices
    -always refer to the original text string.

    -Replacements must not overlap. Otherwise a ValueError is
    -raised.
    -
    normlist(jlist, StringType=<type 'str'>)
    Return a normalized joinlist.

    -All tuples in the joinlist are turned into real strings.  The
    -resulting list is a equivalent copy of the joinlist only
    -consisting of strings.
    -
    prefix(...)
    prefix(text,prefixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -prefix out of the tuple of strings given in prefixes.
    -If no prefix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    print_joinlist(joins, indent=0, StringType=<type 'str'>)
    Print the joinlist joins using the given indent level
    -
    print_tags(text, tags, indent=0)
    Print the taglist tags for text using the given indent level
    -
    print_tagtable(table)
    Print the tag table
    -
    replace(text, what, with, start=0, stop=None, SearchObject=<built-in function TextSearch>, join=<built-in function join>, joinlist=<built-in function joinlist>, tag=<built-in function tag>, string_replace=<function replace>, type=<type 'type'>, StringType=<type 'str'>)
    A fast replacement for string.replace.

    -what can be given as string or search object.

    -This function is a good example for the AppendTagobj-flag usage
    -(the taglist can be used directly as joinlist).
    -
    set(...)
    set(string,logic=1)

    -Returns a character set for string: a bit encoded version
    -of the characters occurring in string.
    -- logic can be set to 0 if all characters *not* in string
    -  should go into the set
    -
    setfind(...)
    setfind(text,set,start=0,stop=len(text))

    -Find the first occurence of any character from set in
    -text[start:stop]
    - set must be a string obtained with set()
    -DEPRECATED: use CharSet().search() instead.
    -
    setsplit(...)
    setsplit(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set,
    -omitting the splitting parts and empty substrings.
    -set must be a string obtained from set()
    -DEPRECATED: use CharSet().split() instead.
    -
    setsplitx(...)
    setsplitx(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set, so
    -that every second entry consists only of characters in set.
    -set must be a string obtained with set()
    -DEPRECATED: use CharSet().splitx() instead.
    -
    setstrip(...)
    setstrip(text,set,start=0,stop=len(text),mode=0)

    -Strip all characters in text[start:stop] appearing in set.
    -mode indicates where to strip (<0: left; =0: left and right;
    ->0: right). set must be a string obtained with set()
    -DEPRECATED: use CharSet().strip() instead.
    -
    split(text, sep, start=0, stop=None, translate=None, SearchObject=<built-in function TextSearch>)
    A faster replacement for string.split().

    -Uses a search object for the task. Returns the result of
    -cutting the text[start:stop] string into snippets at every sep
    -occurance in form of a list of substrings. translate is passed
    -to the search object as translation string if given.

    -XXX Undocumented.
    -
    splitat(...)
    splitat(text,char,nth=1,start=0,stop=len(text))

    -Split text[start:stop] into two substrings at the nth
    -occurance of char and return the result as 2-tuple. If the
    -character is not found, the second string is empty. nth may
    -be negative: the search is then done from the right and the
    -first string is empty in case the character is not found.
    -
    splitlines(text, tag=<built-in function tag>, linesplit_table=((None, 13, '\r', 1), (None, 13, '\n', 1), ('line', 2089, <Character Set object for '^\r\n'>, 1, -2), (None, 101, 1, 1, 1000000), ('empty line', 2150, 0, 0, -4)))
    Split text into a list of single lines.

    -        The following combinations are considered to be line-ends:
    -        ' ', '
    -', '
    -'; they may be used in any combination.  The
    -        line-end indicators are removed from the strings prior to
    -        adding them to the list.

    -        This function allows dealing with text files from Macs, PCs
    -        and Unix origins in a portable way.
    -
    splitwords(text, charset=<Character Set object for ' \t\x0b\r\n\x0c'>)
    Split text into a list of single words.

    -Words are separated by whitespace. The whitespace is stripped
    -before adding the words to the list.
    -
    str2hex(...)
    str2hex(text)

    -Return text converted to a string consisting of two byte
    -HEX values.
    -
    suffix(...)
    suffix(text,suffixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -suffix out of the tuple of strings given in suffixes.
    -If no suffix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    tag(...)
    tag(text,tagtable,sliceleft=0,sliceright=len(text),taglist=[],context=None) 
    -Produce a tag list for a string, given a tag-table
    -- returns a tuple (success, taglist, nextindex)
    -- if taglist == None, then no taglist is created
    -
    tagdict(text, *args)
    Tag a text just like the function tag() and then convert
    -its output into a dictionary where the tagobjects reference
    -their respective strings

    -This function emulates the interface of tag().  In contrast to
    -tag() this funtion *does* make copies of the found stings,
    -though.

    -Returns a tuple (rc,tagdict,next) with the same meaning of rc
    -and next as tag(); tagdict is the new dictionary or None in
    -case rc is 0.
    -
    upper(...)
    upper(text)

    -Return text converted to upper case.
    -
    word_in_list(l)
    Creates a lookup table that matches the words in l
    -

    - - - - - -
     
    -Data
           A2Z = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    -A2Z_charset = <Character Set object for 'A-Z'>
    -A2Z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -AllIn = 11
    -AllInCharSet = 41
    -AllInSet = 31
    -AllNotIn = 12
    -AppendMatch = 2048
    -AppendTagobj = 1024
    -AppendToTagobj = 512
    -BOYERMOORE = 0
    -Break = 0
    -Call = 201
    -CallArg = 202
    -CallTag = 256
    -EOF = 101
    -FASTSEARCH = 1
    -Fail = 100
    -Here = 1
    -Is = 13
    -IsIn = 14
    -IsInCharSet = 42
    -IsInSet = 32
    -IsNot = 15
    -IsNotIn = 15
    -Jump = 100
    -JumpTarget = 104
    -LookAhead = 4096
    -Loop = 205
    -LoopControl = 206
    -MatchFail = -1000000
    -MatchOk = 1000000
    -Move = 103
    -NoWord = 211
    -Reset = -1
    -Skip = 102
    -SubTable = 207
    -SubTableInList = 208
    -TRIVIAL = 2
    -Table = 203
    -TableInList = 204
    -ThisTable = 999
    -To = 0
    -ToBOF = 0
    -ToEOF = -1
    -Umlaute = '\xc4\xd6\xdc'
    -Umlaute_charset = <Character Set object for '\xc4\xd6\xdc'>
    -Word = 21
    -WordEnd = 23
    -WordStart = 22
    -_linecount_table = ((None, 13, '\r', 1), (None, 13, '\n', 1), ('line', 41, <Character Set object for '^\r\n'>, 1, -2), (None, 101, 1, 1, 1000000), ('empty line', 102, 0, 0, -4))
    -_linesplit_table = ((None, 13, '\r', 1), (None, 13, '\n', 1), ('line', 2089, <Character Set object for '^\r\n'>, 1, -2), (None, 101, 1, 1, 1000000), ('empty line', 2150, 0, 0, -4))
    -_wordsplit_table = ((None, 41, <Character Set object for ' \t\x0b\r\n\x0c'>, 1), ('word', 2089, <Character Set object for '^ \t\x0b\r\n\x0c'>, 1, -1), (None, 101, 1, 1, 1000000))
    -a2z = 'abcdefghijklmnopqrstuvwxyz'
    -a2z_charset = <Character Set object for 'a-z'>
    -a2z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
    -alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'>
    -alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alphanumeric = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
    -alphanumeric_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'>
    -alphanumeric_set = '\x00\x00\x00\x00\x00\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -any = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -any_charset = <Character Set object for '\x00-\xff'>
    -any_set = '\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -formfeed = '\x0c'
    -formfeed_charset = <Character Set object for '\x0c'>
    -german_alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'
    -german_alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTU...hijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'>
    -german_alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00@\x90\x10\x00@\x10'
    -id2cmd = {-1000000: 'MatchFail', -1: 'ToEOF', 0: 'Fail/Jump', 1: 'Here', 11: 'AllIn', 12: 'AllNotIn', 13: 'Is', 14: 'IsIn', 15: 'IsNotIn', 21: 'Word', ...}
    -newline = '\r\n'
    -newline_charset = <Character Set object for '\r\n'>
    -newline_set = '\x00$\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -nonwhitespace_charset = <Character Set object for '^ \t\x0b\r\n\x0c'>
    -nonwhitespace_set = '\xff\xc1\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -number = '0123456789'
    -number_charset = <Character Set object for '0-9'>
    -number_set = '\x00\x00\x00\x00\x00\x00\xff\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -sFindWord = 213
    -sWordEnd = 212
    -sWordStart = 211
    -tagtable_cache = {(46912536021760, 0): <String Tag Table object>, (46912540134840, 0): <String Tag Table object>, (46912541410080, 0): <String Tag Table object>, (46912541454848, 0): <String Tag Table object>, (46912541455136, 0): <String Tag Table object>, (46912541455208, 0): <String Tag Table object>, (46912541489264, 0): <String Tag Table object>, (46912541566016, 0): <String Tag Table object>, (46912543903688, 0): <String Tag Table object>, (46912543908136, 0): <String Tag Table object>, ...}
    -to_lower = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -to_upper = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -umlaute = '\xe4\xf6\xfc\xdf'
    -umlaute_charset = <Character Set object for '\xe4\xf6\xfc\xdf'>
    -white = ' \t\x0b'
    -white_charset = <Character Set object for ' \t\x0b'>
    -white_set = '\x00\x02\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -whitespace = ' \t\x0b\r\n\x0c'
    -whitespace_charset = <Character Set object for ' \t\x0b\r\n\x0c'>
    -whitespace_set = '\x00&\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.genericvalues.html simpleparse-2.2.0/doc/pydoc/simpleparse.tests.genericvalues.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.genericvalues.html 2006-02-19 01:05:15.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.tests.genericvalues.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,63 +0,0 @@ - - -Python: module simpleparse.tests.genericvalues - - - - -
     
    - 
    simpleparse.tests.genericvalues
    index
    /home/mcfletch/pylive/simpleparse/tests/genericvalues.py
    -

    Values to match result-trees even when implementations change

    -These values match the "logical" values of the result-trees
    -as they apply to SimpleParse's usage, rather than the particular
    -concrete results returned by the engine.  So, for instance, you
    -can say "returns no children" (NullResults) for result-tuples or
    -"whatever failure position" for failure return values.

    -

    - - - - - -
     
    -Classes
           
    -
    _AnyInt -
    _NullResults -
    -

    - - - - - -
     
    -class _AnyInt
        Methods defined here:
    -
    __cmp__(self, other)
    - -
    __repr__(self)
    - -

    - - - - - -
     
    -class _NullResults
        Methods defined here:
    -
    __cmp__(self, other)
    - -
    __repr__(self)
    - -

    - - - - - -
     
    -Data
           AnyInt = <Any Integer>
    -NullResult = <Null Children>
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.html simpleparse-2.2.0/doc/pydoc/simpleparse.tests.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.html 2006-02-19 01:05:12.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.tests.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,34 +0,0 @@ - - -Python: package simpleparse.tests - - - - -
     
    - 
    simpleparse.tests
    index
    /home/mcfletch/pylive/simpleparse/tests/__init__.py
    -

    Package of test scripts, is a package to make setup.py include it :)

    -Run test.py from the command line to run all the primary tests,
    -it will take a while (10 seconds or so) even on a properly
    -configured system.  A system with an old copy of mx.TextTools
    -might actually experience an infinite loop or a C stack recursion
    -error.

    -

    - - - - - -
     
    -Package Contents
           __init__ -- Package of test scripts, is a package to make setup.py include it :)
    genericvalues -- Values to match result-trees even when implementations change
    mx_flag
    mx_high -- Low-level matching tests for mx.TextTools
    mx_low -- Low-level matching tests for mx.TextTools
    mx_recursive -- Low-level matching tests for mx.TextTools
    mx_special -- Low-level matching tests for mx.TextTools
    mx_test
    test
    test_backup_on_subtable_failure
    test_common_chartypes
    test_common_comments -- Test the various common library comment productions
    test_common_iso_date
    test_common_numbers
    test_common_strings
    test_deep_nesting
    test_erroronfail
    test_grammarparser -- Tests that simpleparsegrammar does parse SimpleParse grammars
    test_objectgenerator
    test_optimisation
    test_printers -- Test the print-to-python-file module
    test_printers_garbage
    test_simpleparsegrammar
    test_xml

    - - - - - -
     
    -Data
           __path__ = ['/home/mcfletch/pylive/simpleparse/tests']
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.__init__.html simpleparse-2.2.0/doc/pydoc/simpleparse.tests.__init__.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.__init__.html 2006-02-19 01:05:15.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.tests.__init__.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,20 +0,0 @@ - - -Python: module simpleparse.tests.__init__ - - - - -
     
    - 
    simpleparse.tests.__init__
    index
    /home/mcfletch/pylive/simpleparse/tests/__init__.py
    -

    Package of test scripts, is a package to make setup.py include it :)

    -Run test.py from the command line to run all the primary tests,
    -it will take a while (10 seconds or so) even on a properly
    -configured system.  A system with an old copy of mx.TextTools
    -might actually experience an infinite loop or a C stack recursion
    -error.

    - - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.mx_flag.html simpleparse-2.2.0/doc/pydoc/simpleparse.tests.mx_flag.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.mx_flag.html 2006-02-19 01:05:15.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.tests.mx_flag.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,441 +0,0 @@ - - -Python: module simpleparse.tests.mx_flag - - - - -
     
    - 
    simpleparse.tests.mx_flag
    index
    /home/mcfletch/pylive/simpleparse/tests/mx_flag.py
    -

    -

    - - - - - -
     
    -Modules
           
    simpleparse.stt.TextTools.Constants
    -simpleparse.stt.TextTools
    -
    simpleparse.stt.TextTools.mxTextTools.mxTextTools
    -pprint
    -
    string
    -time
    -
    types
    -unittest
    -

    - - - - - -
     
    -Classes
           
    -
    TestCase(object) -
    -
    -
    MXFlagTests -
    -
    -
    -

    - - - - - - - -
     
    -class MXFlagTests(TestCase)
       Test Flags for returning/calling different functions on success
     
     
    Method resolution order:
    -
    MXFlagTests
    -
    TestCase
    -
    object
    -
    -
    -Methods defined here:
    -
    doBasicTest(self, table, testvalue, expected, startPosition=0)
    - -
    testAppendMatch1(self)
    Test AppendMatch
    - -
    testAppendTagobj1(self)
    Test AppendTagobj
    - -
    testAppendToTagobj1(self)
    Test AppendToTagobj
    - -
    testAppendToTagobj2(self)
    Test AppendToTagobj with a simple list
    - -
    testCallTag1(self)
    Test CallTag
    - -
    testCallTag2(self)
    Test CallTag with a class instance
    - -
    testLookAhead1(self)
    Test LookAhead
    - -
    testLookAhead2(self)
    Test LookAhead
    - -
    -Methods inherited from TestCase:
    -
    __call__(self, *args, **kwds)
    - -
    __init__(self, methodName='runTest')
    Create an instance of the class that will use the named test
    -method when executed. Raises a ValueError if the instance does
    -not have a method with the specified name.
    - -
    __repr__(self)
    - -
    __str__(self)
    - -
    assertAlmostEqual = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertAlmostEquals = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertEqual = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertEquals = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertFalse = failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    assertNotAlmostEqual = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotAlmostEquals = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotEqual = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertNotEquals = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertRaises = failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    assertTrue = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    assert_ = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    countTestCases(self)
    - -
    debug(self)
    Run the test without collecting errors in a TestResult
    - -
    defaultTestResult(self)
    - -
    fail(self, msg=None)
    Fail immediately, with the given message.
    - -
    failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    id(self)
    - -
    run(self, result=None)
    - -
    setUp(self)
    Hook method for setting up the test fixture before exercising it.
    - -
    shortDescription(self)
    Returns a one-line description of the test, or None if no
    -description has been provided.

    -The default implementation of this method returns the first line of
    -the specified test method's docstring.
    - -
    tearDown(self)
    Hook method for deconstructing the test fixture after testing it.
    - -
    -Data and other attributes inherited from TestCase:
    -
    __dict__ = <dictproxy object>
    dictionary for instance variables (if defined)
    - -
    __weakref__ = <attribute '__weakref__' of 'TestCase' objects>
    list of weak references to the object (if defined)
    - -
    failureException = <class exceptions.AssertionError>
    Assertion failed.
    - -

    - - - - - -
     
    -Functions
           
    BMS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    CharSet(...)
    CharSet(definition)

    -Create a character set matching object from the string
    -
    FS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    FSType = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    TagTable(...)
    TagTable(definition[,cachable=1])
    -
    TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    UnicodeTagTable(...)
    TagTable(definition[,cachable=1])
    -
    charsplit(...)
    charsplit(text,char,start=0,stop=len(text))

    -Split text[start:stop] into substrings at char and
    -return the result as list of strings.
    -
    cmp(...)
    cmp(a,b)

    -Compare two valid taglist tuples w/r to their slice
    -position; this is useful for sorting joinlists.
    -
    getSuite()
    -
    hex2str(...)
    hex2str(text)

    -Return text interpreted as two byte HEX values converted
    -to a string.
    -
    isascii(...)
    isascii(text,start=0,stop=len(text))

    -Return 1/0 depending on whether text only contains ASCII
    -characters.
    -
    join(...)
    join(joinlist,sep='',start=0,stop=len(joinlist))

    -Copy snippets from different strings together producing a
    -new string
    -The first argument must be a list of tuples or strings;
    -tuples must be of the form (string,l,r[,...]) and turn out
    -as string[l:r]
    -NOTE: the syntax used for negative slices is different
    -than the Python standard: -1 corresponds to the first
    -character *after* the string, e.g. ('Example',0,-1) gives
    -'Example' and not 'Exampl', like in Python
    -sep is an optional separator string, start and stop
    -define the slice of joinlist that is taken into accont.
    -
    joinlist(...)
    joinlist(text,list,start=0,stop=len(text))

    -Takes a list of tuples (replacement,l,r,...) and produces
    -a taglist suitable for join() which creates a copy
    -of text where every slice [l:r] is replaced by the
    -given replacement
    -- the list must be sorted using cmp() as compare function
    -- it may not contain overlapping slices
    -- the slices may not contain negative indices
    -- if the taglist cannot contain overlapping slices, you can
    -  give this function the taglist produced by tag() directly
    -  (sorting is not needed, as the list will already be sorted)
    -- start and stop set the slice to work in, i.e. text[start:stop]
    -
    lower(...)
    lower(text)

    -Return text converted to lower case.
    -
    prefix(...)
    prefix(text,prefixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -prefix out of the tuple of strings given in prefixes.
    -If no prefix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    set(...)
    set(string,logic=1)

    -Returns a character set for string: a bit encoded version
    -of the characters occurring in string.
    -- logic can be set to 0 if all characters *not* in string
    -  should go into the set
    -
    setfind(...)
    setfind(text,set,start=0,stop=len(text))

    -Find the first occurence of any character from set in
    -text[start:stop]
    - set must be a string obtained with set()
    -DEPRECATED: use CharSet().search() instead.
    -
    setsplit(...)
    setsplit(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set,
    -omitting the splitting parts and empty substrings.
    -set must be a string obtained from set()
    -DEPRECATED: use CharSet().split() instead.
    -
    setsplitx(...)
    setsplitx(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set, so
    -that every second entry consists only of characters in set.
    -set must be a string obtained with set()
    -DEPRECATED: use CharSet().splitx() instead.
    -
    setstrip(...)
    setstrip(text,set,start=0,stop=len(text),mode=0)

    -Strip all characters in text[start:stop] appearing in set.
    -mode indicates where to strip (<0: left; =0: left and right;
    ->0: right). set must be a string obtained with set()
    -DEPRECATED: use CharSet().strip() instead.
    -
    splitat(...)
    splitat(text,char,nth=1,start=0,stop=len(text))

    -Split text[start:stop] into two substrings at the nth
    -occurance of char and return the result as 2-tuple. If the
    -character is not found, the second string is empty. nth may
    -be negative: the search is then done from the right and the
    -first string is empty in case the character is not found.
    -
    str2hex(...)
    str2hex(text)

    -Return text converted to a string consisting of two byte
    -HEX values.
    -
    suffix(...)
    suffix(text,suffixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -suffix out of the tuple of strings given in suffixes.
    -If no suffix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    tag(...)
    tag(text,tagtable,sliceleft=0,sliceright=len(text),taglist=[],context=None) 
    -Produce a tag list for a string, given a tag-table
    -- returns a tuple (success, taglist, nextindex)
    -- if taglist == None, then no taglist is created
    -
    upper(...)
    upper(text)

    -Return text converted to upper case.
    -

    - - - - - -
     
    -Data
           A2Z = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    -A2Z_charset = <Character Set object for 'A-Z'>
    -A2Z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -AllIn = 11
    -AllInCharSet = 41
    -AllInSet = 31
    -AllNotIn = 12
    -AppendMatch = 2048
    -AppendTagobj = 1024
    -AppendToTagobj = 512
    -BOYERMOORE = 0
    -Break = 0
    -Call = 201
    -CallArg = 202
    -CallTag = 256
    -EOF = 101
    -FASTSEARCH = 1
    -Fail = 100
    -Here = 1
    -Is = 13
    -IsIn = 14
    -IsInCharSet = 42
    -IsInSet = 32
    -IsNot = 15
    -IsNotIn = 15
    -Jump = 100
    -JumpTarget = 104
    -LookAhead = 4096
    -Loop = 205
    -LoopControl = 206
    -MatchFail = -1000000
    -MatchOk = 1000000
    -Move = 103
    -NoWord = 211
    -Reset = -1
    -Skip = 102
    -SubTable = 207
    -SubTableInList = 208
    -TRIVIAL = 2
    -Table = 203
    -TableInList = 204
    -ThisTable = 999
    -To = 0
    -ToBOF = 0
    -ToEOF = -1
    -Umlaute = '\xc4\xd6\xdc'
    -Umlaute_charset = <Character Set object for '\xc4\xd6\xdc'>
    -Word = 21
    -WordEnd = 23
    -WordStart = 22
    -a2z = 'abcdefghijklmnopqrstuvwxyz'
    -a2z_charset = <Character Set object for 'a-z'>
    -a2z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
    -alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'>
    -alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alphanumeric = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
    -alphanumeric_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'>
    -alphanumeric_set = '\x00\x00\x00\x00\x00\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -any = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -any_charset = <Character Set object for '\x00-\xff'>
    -any_set = '\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -formfeed = '\x0c'
    -formfeed_charset = <Character Set object for '\x0c'>
    -german_alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'
    -german_alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTU...hijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'>
    -german_alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00@\x90\x10\x00@\x10'
    -id2cmd = {-1000000: 'MatchFail', -1: 'ToEOF', 0: 'Fail/Jump', 1: 'Here', 11: 'AllIn', 12: 'AllNotIn', 13: 'Is', 14: 'IsIn', 15: 'IsNotIn', 21: 'Word', ...}
    -mxVersion = ('2', '1', '0')
    -newline = '\r\n'
    -newline_charset = <Character Set object for '\r\n'>
    -newline_set = '\x00$\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -nonwhitespace_charset = <Character Set object for '^ \t\x0b\r\n\x0c'>
    -nonwhitespace_set = '\xff\xc1\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -number = '0123456789'
    -number_charset = <Character Set object for '0-9'>
    -number_set = '\x00\x00\x00\x00\x00\x00\xff\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -sFindWord = 213
    -sWordEnd = 212
    -sWordStart = 211
    -tagtable_cache = {(46912536021760, 0): <String Tag Table object>, (46912540134840, 0): <String Tag Table object>, (46912541410080, 0): <String Tag Table object>, (46912541454848, 0): <String Tag Table object>, (46912541455136, 0): <String Tag Table object>, (46912541455208, 0): <String Tag Table object>, (46912541489264, 0): <String Tag Table object>, (46912541566016, 0): <String Tag Table object>, (46912543903688, 0): <String Tag Table object>, (46912543908136, 0): <String Tag Table object>, ...}
    -to_lower = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -to_upper = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -umlaute = '\xe4\xf6\xfc\xdf'
    -umlaute_charset = <Character Set object for '\xe4\xf6\xfc\xdf'>
    -white = ' \t\x0b'
    -white_charset = <Character Set object for ' \t\x0b'>
    -white_set = '\x00\x02\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -whitespace = ' \t\x0b\r\n\x0c'
    -whitespace_charset = <Character Set object for ' \t\x0b\r\n\x0c'>
    -whitespace_set = '\x00&\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.mx_high.html simpleparse-2.2.0/doc/pydoc/simpleparse.tests.mx_high.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.mx_high.html 2006-02-19 01:05:15.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.tests.mx_high.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,441 +0,0 @@ - - -Python: module simpleparse.tests.mx_high - - - - -
     
    - 
    simpleparse.tests.mx_high
    index
    /home/mcfletch/pylive/simpleparse/tests/mx_high.py
    -

    Low-level matching tests for mx.TextTools

    -

    - - - - - -
     
    -Modules
           
    simpleparse.stt.TextTools.Constants
    -simpleparse.stt.TextTools
    -
    simpleparse.stt.TextTools.mxTextTools.mxTextTools
    -pprint
    -
    string
    -time
    -
    types
    -unittest
    -

    - - - - - -
     
    -Classes
           
    -
    TestCase(object) -
    -
    -
    MXHighTests -
    -
    -
    -

    - - - - - -
     
    -class MXHighTests(TestCase)
        
    Method resolution order:
    -
    MXHighTests
    -
    TestCase
    -
    object
    -
    -
    -Methods defined here:
    -
    doBasicTest(self, table, testvalue, expected, startPosition=0)
    - -
    testCall(self)
    Test call-to-match Call command
    - -
    testCall2(self)
    Test call-to-match Call command with object instance
    - -
    testCallArg(self)
    Test call-to-match CallArg command
    - -
    testsFindWord1(self)
    Test simple sWordFind command
    - -
    testsFindWord2(self)
    Test simple sFindWord command ignore fail
    - -
    testsWordEnd1(self)
    Test simple sWordEnd command
    - -
    testsWordEnd2(self)
    Test simple sWordEnd command ignore fail
    - -
    testsWordStart1(self)
    Test simple sWordStart command
    - -
    testsWordStart2(self)
    Test simple sWordStart command ignore fail
    - -
    -Methods inherited from TestCase:
    -
    __call__(self, *args, **kwds)
    - -
    __init__(self, methodName='runTest')
    Create an instance of the class that will use the named test
    -method when executed. Raises a ValueError if the instance does
    -not have a method with the specified name.
    - -
    __repr__(self)
    - -
    __str__(self)
    - -
    assertAlmostEqual = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertAlmostEquals = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertEqual = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertEquals = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertFalse = failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    assertNotAlmostEqual = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotAlmostEquals = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotEqual = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertNotEquals = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertRaises = failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    assertTrue = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    assert_ = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    countTestCases(self)
    - -
    debug(self)
    Run the test without collecting errors in a TestResult
    - -
    defaultTestResult(self)
    - -
    fail(self, msg=None)
    Fail immediately, with the given message.
    - -
    failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    id(self)
    - -
    run(self, result=None)
    - -
    setUp(self)
    Hook method for setting up the test fixture before exercising it.
    - -
    shortDescription(self)
    Returns a one-line description of the test, or None if no
    -description has been provided.

    -The default implementation of this method returns the first line of
    -the specified test method's docstring.
    - -
    tearDown(self)
    Hook method for deconstructing the test fixture after testing it.
    - -
    -Data and other attributes inherited from TestCase:
    -
    __dict__ = <dictproxy object>
    dictionary for instance variables (if defined)
    - -
    __weakref__ = <attribute '__weakref__' of 'TestCase' objects>
    list of weak references to the object (if defined)
    - -
    failureException = <class exceptions.AssertionError>
    Assertion failed.
    - -

    - - - - - -
     
    -Functions
           
    BMS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    CharSet(...)
    CharSet(definition)

    -Create a character set matching object from the string
    -
    FS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    FSType = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    TagTable(...)
    TagTable(definition[,cachable=1])
    -
    TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    UnicodeTagTable(...)
    TagTable(definition[,cachable=1])
    -
    charsplit(...)
    charsplit(text,char,start=0,stop=len(text))

    -Split text[start:stop] into substrings at char and
    -return the result as list of strings.
    -
    cmp(...)
    cmp(a,b)

    -Compare two valid taglist tuples w/r to their slice
    -position; this is useful for sorting joinlists.
    -
    getSuite()
    -
    hex2str(...)
    hex2str(text)

    -Return text interpreted as two byte HEX values converted
    -to a string.
    -
    isascii(...)
    isascii(text,start=0,stop=len(text))

    -Return 1/0 depending on whether text only contains ASCII
    -characters.
    -
    join(...)
    join(joinlist,sep='',start=0,stop=len(joinlist))

    -Copy snippets from different strings together producing a
    -new string
    -The first argument must be a list of tuples or strings;
    -tuples must be of the form (string,l,r[,...]) and turn out
    -as string[l:r]
    -NOTE: the syntax used for negative slices is different
    -than the Python standard: -1 corresponds to the first
    -character *after* the string, e.g. ('Example',0,-1) gives
    -'Example' and not 'Exampl', like in Python
    -sep is an optional separator string, start and stop
    -define the slice of joinlist that is taken into accont.
    -
    joinlist(...)
    joinlist(text,list,start=0,stop=len(text))

    -Takes a list of tuples (replacement,l,r,...) and produces
    -a taglist suitable for join() which creates a copy
    -of text where every slice [l:r] is replaced by the
    -given replacement
    -- the list must be sorted using cmp() as compare function
    -- it may not contain overlapping slices
    -- the slices may not contain negative indices
    -- if the taglist cannot contain overlapping slices, you can
    -  give this function the taglist produced by tag() directly
    -  (sorting is not needed, as the list will already be sorted)
    -- start and stop set the slice to work in, i.e. text[start:stop]
    -
    lower(...)
    lower(text)

    -Return text converted to lower case.
    -
    prefix(...)
    prefix(text,prefixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -prefix out of the tuple of strings given in prefixes.
    -If no prefix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    set(...)
    set(string,logic=1)

    -Returns a character set for string: a bit encoded version
    -of the characters occurring in string.
    -- logic can be set to 0 if all characters *not* in string
    -  should go into the set
    -
    setfind(...)
    setfind(text,set,start=0,stop=len(text))

    -Find the first occurence of any character from set in
    -text[start:stop]
    - set must be a string obtained with set()
    -DEPRECATED: use CharSet().search() instead.
    -
    setsplit(...)
    setsplit(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set,
    -omitting the splitting parts and empty substrings.
    -set must be a string obtained from set()
    -DEPRECATED: use CharSet().split() instead.
    -
    setsplitx(...)
    setsplitx(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set, so
    -that every second entry consists only of characters in set.
    -set must be a string obtained with set()
    -DEPRECATED: use CharSet().splitx() instead.
    -
    setstrip(...)
    setstrip(text,set,start=0,stop=len(text),mode=0)

    -Strip all characters in text[start:stop] appearing in set.
    -mode indicates where to strip (<0: left; =0: left and right;
    ->0: right). set must be a string obtained with set()
    -DEPRECATED: use CharSet().strip() instead.
    -
    splitat(...)
    splitat(text,char,nth=1,start=0,stop=len(text))

    -Split text[start:stop] into two substrings at the nth
    -occurance of char and return the result as 2-tuple. If the
    -character is not found, the second string is empty. nth may
    -be negative: the search is then done from the right and the
    -first string is empty in case the character is not found.
    -
    str2hex(...)
    str2hex(text)

    -Return text converted to a string consisting of two byte
    -HEX values.
    -
    suffix(...)
    suffix(text,suffixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -suffix out of the tuple of strings given in suffixes.
    -If no suffix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    tag(...)
    tag(text,tagtable,sliceleft=0,sliceright=len(text),taglist=[],context=None) 
    -Produce a tag list for a string, given a tag-table
    -- returns a tuple (success, taglist, nextindex)
    -- if taglist == None, then no taglist is created
    -
    upper(...)
    upper(text)

    -Return text converted to upper case.
    -

    - - - - - -
     
    -Data
           A2Z = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    -A2Z_charset = <Character Set object for 'A-Z'>
    -A2Z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -AllIn = 11
    -AllInCharSet = 41
    -AllInSet = 31
    -AllNotIn = 12
    -AppendMatch = 2048
    -AppendTagobj = 1024
    -AppendToTagobj = 512
    -BOYERMOORE = 0
    -Break = 0
    -Call = 201
    -CallArg = 202
    -CallTag = 256
    -EOF = 101
    -FASTSEARCH = 1
    -Fail = 100
    -Here = 1
    -Is = 13
    -IsIn = 14
    -IsInCharSet = 42
    -IsInSet = 32
    -IsNot = 15
    -IsNotIn = 15
    -Jump = 100
    -JumpTarget = 104
    -LookAhead = 4096
    -Loop = 205
    -LoopControl = 206
    -MatchFail = -1000000
    -MatchOk = 1000000
    -Move = 103
    -NoWord = 211
    -Reset = -1
    -Skip = 102
    -SubTable = 207
    -SubTableInList = 208
    -TRIVIAL = 2
    -Table = 203
    -TableInList = 204
    -ThisTable = 999
    -To = 0
    -ToBOF = 0
    -ToEOF = -1
    -Umlaute = '\xc4\xd6\xdc'
    -Umlaute_charset = <Character Set object for '\xc4\xd6\xdc'>
    -Word = 21
    -WordEnd = 23
    -WordStart = 22
    -a2z = 'abcdefghijklmnopqrstuvwxyz'
    -a2z_charset = <Character Set object for 'a-z'>
    -a2z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
    -alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'>
    -alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alphanumeric = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
    -alphanumeric_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'>
    -alphanumeric_set = '\x00\x00\x00\x00\x00\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -any = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -any_charset = <Character Set object for '\x00-\xff'>
    -any_set = '\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -formfeed = '\x0c'
    -formfeed_charset = <Character Set object for '\x0c'>
    -german_alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'
    -german_alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTU...hijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'>
    -german_alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00@\x90\x10\x00@\x10'
    -id2cmd = {-1000000: 'MatchFail', -1: 'ToEOF', 0: 'Fail/Jump', 1: 'Here', 11: 'AllIn', 12: 'AllNotIn', 13: 'Is', 14: 'IsIn', 15: 'IsNotIn', 21: 'Word', ...}
    -mxVersion = ('2', '1', '0')
    -newline = '\r\n'
    -newline_charset = <Character Set object for '\r\n'>
    -newline_set = '\x00$\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -nonwhitespace_charset = <Character Set object for '^ \t\x0b\r\n\x0c'>
    -nonwhitespace_set = '\xff\xc1\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -number = '0123456789'
    -number_charset = <Character Set object for '0-9'>
    -number_set = '\x00\x00\x00\x00\x00\x00\xff\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -sFindWord = 213
    -sWordEnd = 212
    -sWordStart = 211
    -tagtable_cache = {(46912536021760, 0): <String Tag Table object>, (46912540134840, 0): <String Tag Table object>, (46912541410080, 0): <String Tag Table object>, (46912541454848, 0): <String Tag Table object>, (46912541455136, 0): <String Tag Table object>, (46912541455208, 0): <String Tag Table object>, (46912541489264, 0): <String Tag Table object>, (46912541566016, 0): <String Tag Table object>, (46912543903688, 0): <String Tag Table object>, (46912543908136, 0): <String Tag Table object>, ...}
    -to_lower = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -to_upper = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -umlaute = '\xe4\xf6\xfc\xdf'
    -umlaute_charset = <Character Set object for '\xe4\xf6\xfc\xdf'>
    -white = ' \t\x0b'
    -white_charset = <Character Set object for ' \t\x0b'>
    -white_set = '\x00\x02\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -whitespace = ' \t\x0b\r\n\x0c'
    -whitespace_charset = <Character Set object for ' \t\x0b\r\n\x0c'>
    -whitespace_set = '\x00&\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.mx_low.html simpleparse-2.2.0/doc/pydoc/simpleparse.tests.mx_low.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.mx_low.html 2006-02-19 01:05:15.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.tests.mx_low.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,485 +0,0 @@ - - -Python: module simpleparse.tests.mx_low - - - - -
     
    - 
    simpleparse.tests.mx_low
    index
    /home/mcfletch/pylive/simpleparse/tests/mx_low.py
    -

    Low-level matching tests for mx.TextTools

    -

    - - - - - -
     
    -Modules
           
    simpleparse.stt.TextTools.Constants
    -simpleparse.stt.TextTools
    -
    simpleparse.stt.TextTools.mxTextTools.mxTextTools
    -pprint
    -
    string
    -time
    -
    types
    -unittest
    -

    - - - - - -
     
    -Classes
           
    -
    TestCase(object) -
    -
    -
    MXLowTests -
    -
    -
    -

    - - - - - -
     
    -class MXLowTests(TestCase)
        
    Method resolution order:
    -
    MXLowTests
    -
    TestCase
    -
    object
    -
    -
    -Methods defined here:
    -
    doBasicTest(self, table, testvalue, expected, startPosition=0)
    - -
    testAllIn1(self)
    Test simple AllIn command
    - -
    testAllIn2(self)
    Test simple AllIn command ignore fail
    - -
    testAllIn3(self)
    Test simple AllIn command w 2 items
    - -
    testAllIn4(self)
    Test simple AllIn command fail on second

    -This should truncate the results list back to [], as well
    -as returning 0 as length.  This is broken under
    -mx.TextTools 2.1.0b1!
    - -
    testAllIn5(self)
    Test simple AllIn command with None tagobj
    - -
    testAllInCharSet1(self)
    Test simple AllInSet command w/ CharSet object
    - -
    testAllInCharSet2(self)
    Test simple AllInSet command ignore fail
    - -
    testAllInSet1(self)
    Test simple AllInSet command
    - -
    testAllInSet2(self)
    Test simple AllInSet command ignore fail
    - -
    testAllNotIn1(self)
    Test simple AllNotIn command
    - -
    testAllNotIn2(self)
    Test simple AllNotIn command ignore fail
    - -
    testAllNotIn3(self)
    Test simple AllNotIn command w 2 items
    - -
    testIs1(self)
    Test simple Is command
    - -
    testIs2(self)
    Test simple Is command ignore fail
    - -
    testIsIn1(self)
    Test simple IsIn command
    - -
    testIsIn2(self)
    Test simple IsIn command ignore fail
    - -
    testIsInCharSet1(self)
    Test simple IsInCharSet command
    - -
    testIsInCharSet2(self)
    Test simple IsInCharSet command ignore fail
    - -
    testIsInSet1(self)
    Test simple IsInSet command
    - -
    testIsInSet2(self)
    Test simple IsInSet command ignore fail
    - -
    testIsNotIn1(self)
    Test simple IsNotIn command
    - -
    testIsNotIn2(self)
    Test simple IsNotIn command ignore fail
    - -
    testWord1(self)
    Test simple Word command
    - -
    testWord2(self)
    Test simple Word command ignore fail
    - -
    testWordEnd1(self)
    Test simple WordEnd command
    - -
    testWordEnd2(self)
    Test simple WordEnd command ignore fail
    - -
    testWordStart1(self)
    Test simple WordStart command
    - -
    testWordStart2(self)
    Test simple WordStart command ignore fail
    - -
    -Methods inherited from TestCase:
    -
    __call__(self, *args, **kwds)
    - -
    __init__(self, methodName='runTest')
    Create an instance of the class that will use the named test
    -method when executed. Raises a ValueError if the instance does
    -not have a method with the specified name.
    - -
    __repr__(self)
    - -
    __str__(self)
    - -
    assertAlmostEqual = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertAlmostEquals = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertEqual = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertEquals = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertFalse = failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    assertNotAlmostEqual = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotAlmostEquals = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotEqual = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertNotEquals = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertRaises = failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    assertTrue = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    assert_ = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    countTestCases(self)
    - -
    debug(self)
    Run the test without collecting errors in a TestResult
    - -
    defaultTestResult(self)
    - -
    fail(self, msg=None)
    Fail immediately, with the given message.
    - -
    failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    id(self)
    - -
    run(self, result=None)
    - -
    setUp(self)
    Hook method for setting up the test fixture before exercising it.
    - -
    shortDescription(self)
    Returns a one-line description of the test, or None if no
    -description has been provided.

    -The default implementation of this method returns the first line of
    -the specified test method's docstring.
    - -
    tearDown(self)
    Hook method for deconstructing the test fixture after testing it.
    - -
    -Data and other attributes inherited from TestCase:
    -
    __dict__ = <dictproxy object>
    dictionary for instance variables (if defined)
    - -
    __weakref__ = <attribute '__weakref__' of 'TestCase' objects>
    list of weak references to the object (if defined)
    - -
    failureException = <class exceptions.AssertionError>
    Assertion failed.
    - -

    - - - - - -
     
    -Functions
           
    BMS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    CharSet(...)
    CharSet(definition)

    -Create a character set matching object from the string
    -
    FS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    FSType = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    TagTable(...)
    TagTable(definition[,cachable=1])
    -
    TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    UnicodeTagTable(...)
    TagTable(definition[,cachable=1])
    -
    charsplit(...)
    charsplit(text,char,start=0,stop=len(text))

    -Split text[start:stop] into substrings at char and
    -return the result as list of strings.
    -
    cmp(...)
    cmp(a,b)

    -Compare two valid taglist tuples w/r to their slice
    -position; this is useful for sorting joinlists.
    -
    getSuite()
    -
    hex2str(...)
    hex2str(text)

    -Return text interpreted as two byte HEX values converted
    -to a string.
    -
    isascii(...)
    isascii(text,start=0,stop=len(text))

    -Return 1/0 depending on whether text only contains ASCII
    -characters.
    -
    join(...)
    join(joinlist,sep='',start=0,stop=len(joinlist))

    -Copy snippets from different strings together producing a
    -new string
    -The first argument must be a list of tuples or strings;
    -tuples must be of the form (string,l,r[,...]) and turn out
    -as string[l:r]
    -NOTE: the syntax used for negative slices is different
    -than the Python standard: -1 corresponds to the first
    -character *after* the string, e.g. ('Example',0,-1) gives
    -'Example' and not 'Exampl', like in Python
    -sep is an optional separator string, start and stop
    -define the slice of joinlist that is taken into accont.
    -
    joinlist(...)
    joinlist(text,list,start=0,stop=len(text))

    -Takes a list of tuples (replacement,l,r,...) and produces
    -a taglist suitable for join() which creates a copy
    -of text where every slice [l:r] is replaced by the
    -given replacement
    -- the list must be sorted using cmp() as compare function
    -- it may not contain overlapping slices
    -- the slices may not contain negative indices
    -- if the taglist cannot contain overlapping slices, you can
    -  give this function the taglist produced by tag() directly
    -  (sorting is not needed, as the list will already be sorted)
    -- start and stop set the slice to work in, i.e. text[start:stop]
    -
    lower(...)
    lower(text)

    -Return text converted to lower case.
    -
    prefix(...)
    prefix(text,prefixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -prefix out of the tuple of strings given in prefixes.
    -If no prefix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    set(...)
    set(string,logic=1)

    -Returns a character set for string: a bit encoded version
    -of the characters occurring in string.
    -- logic can be set to 0 if all characters *not* in string
    -  should go into the set
    -
    setfind(...)
    setfind(text,set,start=0,stop=len(text))

    -Find the first occurence of any character from set in
    -text[start:stop]
    - set must be a string obtained with set()
    -DEPRECATED: use CharSet().search() instead.
    -
    setsplit(...)
    setsplit(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set,
    -omitting the splitting parts and empty substrings.
    -set must be a string obtained from set()
    -DEPRECATED: use CharSet().split() instead.
    -
    setsplitx(...)
    setsplitx(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set, so
    -that every second entry consists only of characters in set.
    -set must be a string obtained with set()
    -DEPRECATED: use CharSet().splitx() instead.
    -
    setstrip(...)
    setstrip(text,set,start=0,stop=len(text),mode=0)

    -Strip all characters in text[start:stop] appearing in set.
    -mode indicates where to strip (<0: left; =0: left and right;
    ->0: right). set must be a string obtained with set()
    -DEPRECATED: use CharSet().strip() instead.
    -
    splitat(...)
    splitat(text,char,nth=1,start=0,stop=len(text))

    -Split text[start:stop] into two substrings at the nth
    -occurance of char and return the result as 2-tuple. If the
    -character is not found, the second string is empty. nth may
    -be negative: the search is then done from the right and the
    -first string is empty in case the character is not found.
    -
    str2hex(...)
    str2hex(text)

    -Return text converted to a string consisting of two byte
    -HEX values.
    -
    suffix(...)
    suffix(text,suffixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -suffix out of the tuple of strings given in suffixes.
    -If no suffix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    tag(...)
    tag(text,tagtable,sliceleft=0,sliceright=len(text),taglist=[],context=None) 
    -Produce a tag list for a string, given a tag-table
    -- returns a tuple (success, taglist, nextindex)
    -- if taglist == None, then no taglist is created
    -
    upper(...)
    upper(text)

    -Return text converted to upper case.
    -

    - - - - - -
     
    -Data
           A2Z = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    -A2Z_charset = <Character Set object for 'A-Z'>
    -A2Z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -AllIn = 11
    -AllInCharSet = 41
    -AllInSet = 31
    -AllNotIn = 12
    -AnyInt = <Any Integer>
    -AppendMatch = 2048
    -AppendTagobj = 1024
    -AppendToTagobj = 512
    -BOYERMOORE = 0
    -Break = 0
    -Call = 201
    -CallArg = 202
    -CallTag = 256
    -EOF = 101
    -FASTSEARCH = 1
    -Fail = 100
    -Here = 1
    -Is = 13
    -IsIn = 14
    -IsInCharSet = 42
    -IsInSet = 32
    -IsNot = 15
    -IsNotIn = 15
    -Jump = 100
    -JumpTarget = 104
    -LookAhead = 4096
    -Loop = 205
    -LoopControl = 206
    -MatchFail = -1000000
    -MatchOk = 1000000
    -Move = 103
    -NoWord = 211
    -NullResult = <Null Children>
    -Reset = -1
    -Skip = 102
    -SubTable = 207
    -SubTableInList = 208
    -TRIVIAL = 2
    -Table = 203
    -TableInList = 204
    -ThisTable = 999
    -To = 0
    -ToBOF = 0
    -ToEOF = -1
    -Umlaute = '\xc4\xd6\xdc'
    -Umlaute_charset = <Character Set object for '\xc4\xd6\xdc'>
    -Word = 21
    -WordEnd = 23
    -WordStart = 22
    -a2z = 'abcdefghijklmnopqrstuvwxyz'
    -a2z_charset = <Character Set object for 'a-z'>
    -a2z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
    -alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'>
    -alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alphanumeric = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
    -alphanumeric_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'>
    -alphanumeric_set = '\x00\x00\x00\x00\x00\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -any = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -any_charset = <Character Set object for '\x00-\xff'>
    -any_set = '\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -formfeed = '\x0c'
    -formfeed_charset = <Character Set object for '\x0c'>
    -german_alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'
    -german_alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTU...hijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'>
    -german_alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00@\x90\x10\x00@\x10'
    -id2cmd = {-1000000: 'MatchFail', -1: 'ToEOF', 0: 'Fail/Jump', 1: 'Here', 11: 'AllIn', 12: 'AllNotIn', 13: 'Is', 14: 'IsIn', 15: 'IsNotIn', 21: 'Word', ...}
    -mxVersion = ('2', '1', '0')
    -newline = '\r\n'
    -newline_charset = <Character Set object for '\r\n'>
    -newline_set = '\x00$\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -nonwhitespace_charset = <Character Set object for '^ \t\x0b\r\n\x0c'>
    -nonwhitespace_set = '\xff\xc1\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -number = '0123456789'
    -number_charset = <Character Set object for '0-9'>
    -number_set = '\x00\x00\x00\x00\x00\x00\xff\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -sFindWord = 213
    -sWordEnd = 212
    -sWordStart = 211
    -tagtable_cache = {(46912536021760, 0): <String Tag Table object>, (46912540134840, 0): <String Tag Table object>, (46912541410080, 0): <String Tag Table object>, (46912541454848, 0): <String Tag Table object>, (46912541455136, 0): <String Tag Table object>, (46912541455208, 0): <String Tag Table object>, (46912541489264, 0): <String Tag Table object>, (46912541566016, 0): <String Tag Table object>, (46912543903688, 0): <String Tag Table object>, (46912543908136, 0): <String Tag Table object>, ...}
    -to_lower = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -to_upper = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -umlaute = '\xe4\xf6\xfc\xdf'
    -umlaute_charset = <Character Set object for '\xe4\xf6\xfc\xdf'>
    -white = ' \t\x0b'
    -white_charset = <Character Set object for ' \t\x0b'>
    -white_set = '\x00\x02\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -whitespace = ' \t\x0b\r\n\x0c'
    -whitespace_charset = <Character Set object for ' \t\x0b\r\n\x0c'>
    -whitespace_set = '\x00&\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.mx_recursive.html simpleparse-2.2.0/doc/pydoc/simpleparse.tests.mx_recursive.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.mx_recursive.html 2006-02-19 01:05:15.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.tests.mx_recursive.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,445 +0,0 @@ - - -Python: module simpleparse.tests.mx_recursive - - - - -
     
    - 
    simpleparse.tests.mx_recursive
    index
    /home/mcfletch/pylive/simpleparse/tests/mx_recursive.py
    -

    Low-level matching tests for mx.TextTools

    -

    - - - - - -
     
    -Modules
           
    simpleparse.stt.TextTools.Constants
    -simpleparse.stt.TextTools.TextTools
    -
    simpleparse.stt.TextTools.mxTextTools.mxTextTools
    -pprint
    -
    string
    -time
    -
    types
    -unittest
    -

    - - - - - -
     
    -Classes
           
    -
    TestCase(object) -
    -
    -
    MXRecursiveTests -
    -
    -
    -

    - - - - - -
     
    -class MXRecursiveTests(TestCase)
        
    Method resolution order:
    -
    MXRecursiveTests
    -
    TestCase
    -
    object
    -
    -
    -Methods defined here:
    -
    doBasicTest(self, table, testvalue, expected, startPosition=0)
    - -
    testAB(self)
    Test AB testing command
    - -
    testABCDEF(self)
    Test abcdef all together
    - -
    testCDEF(self)
    Test CDEF testing command
    - -
    testSubTable1(self)
    Test SubTable command
    - -
    testSubTable2(self)
    Test SubTable command with no reporting of st groups
    - -
    testSubTableInList1(self)
    Test SubTableInList command
    - -
    testSubTableInList2(self)
    Test SubTable command with no reporting of st groups
    - -
    testSubTableNotReturnRecursive(self)
    Test that SubTable calls don't return a recursive structure
    - -
    testTable1(self)
    Test Table command
    - -
    testTableInList1(self)
    Test TableInList command
    - -
    -Methods inherited from TestCase:
    -
    __call__(self, *args, **kwds)
    - -
    __init__(self, methodName='runTest')
    Create an instance of the class that will use the named test
    -method when executed. Raises a ValueError if the instance does
    -not have a method with the specified name.
    - -
    __repr__(self)
    - -
    __str__(self)
    - -
    assertAlmostEqual = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertAlmostEquals = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertEqual = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertEquals = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertFalse = failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    assertNotAlmostEqual = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotAlmostEquals = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotEqual = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertNotEquals = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertRaises = failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    assertTrue = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    assert_ = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    countTestCases(self)
    - -
    debug(self)
    Run the test without collecting errors in a TestResult
    - -
    defaultTestResult(self)
    - -
    fail(self, msg=None)
    Fail immediately, with the given message.
    - -
    failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    id(self)
    - -
    run(self, result=None)
    - -
    setUp(self)
    Hook method for setting up the test fixture before exercising it.
    - -
    shortDescription(self)
    Returns a one-line description of the test, or None if no
    -description has been provided.

    -The default implementation of this method returns the first line of
    -the specified test method's docstring.
    - -
    tearDown(self)
    Hook method for deconstructing the test fixture after testing it.
    - -
    -Data and other attributes inherited from TestCase:
    -
    __dict__ = <dictproxy object>
    dictionary for instance variables (if defined)
    - -
    __weakref__ = <attribute '__weakref__' of 'TestCase' objects>
    list of weak references to the object (if defined)
    - -
    failureException = <class exceptions.AssertionError>
    Assertion failed.
    - -

    - - - - - -
     
    -Functions
           
    BMS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    CharSet(...)
    CharSet(definition)

    -Create a character set matching object from the string
    -
    FS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    FSType = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    TagTable(...)
    TagTable(definition[,cachable=1])
    -
    TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    UnicodeTagTable(...)
    TagTable(definition[,cachable=1])
    -
    charsplit(...)
    charsplit(text,char,start=0,stop=len(text))

    -Split text[start:stop] into substrings at char and
    -return the result as list of strings.
    -
    cmp(...)
    cmp(a,b)

    -Compare two valid taglist tuples w/r to their slice
    -position; this is useful for sorting joinlists.
    -
    getSuite()
    -
    hex2str(...)
    hex2str(text)

    -Return text interpreted as two byte HEX values converted
    -to a string.
    -
    isascii(...)
    isascii(text,start=0,stop=len(text))

    -Return 1/0 depending on whether text only contains ASCII
    -characters.
    -
    join(...)
    join(joinlist,sep='',start=0,stop=len(joinlist))

    -Copy snippets from different strings together producing a
    -new string
    -The first argument must be a list of tuples or strings;
    -tuples must be of the form (string,l,r[,...]) and turn out
    -as string[l:r]
    -NOTE: the syntax used for negative slices is different
    -than the Python standard: -1 corresponds to the first
    -character *after* the string, e.g. ('Example',0,-1) gives
    -'Example' and not 'Exampl', like in Python
    -sep is an optional separator string, start and stop
    -define the slice of joinlist that is taken into accont.
    -
    joinlist(...)
    joinlist(text,list,start=0,stop=len(text))

    -Takes a list of tuples (replacement,l,r,...) and produces
    -a taglist suitable for join() which creates a copy
    -of text where every slice [l:r] is replaced by the
    -given replacement
    -- the list must be sorted using cmp() as compare function
    -- it may not contain overlapping slices
    -- the slices may not contain negative indices
    -- if the taglist cannot contain overlapping slices, you can
    -  give this function the taglist produced by tag() directly
    -  (sorting is not needed, as the list will already be sorted)
    -- start and stop set the slice to work in, i.e. text[start:stop]
    -
    lower(...)
    lower(text)

    -Return text converted to lower case.
    -
    prefix(...)
    prefix(text,prefixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -prefix out of the tuple of strings given in prefixes.
    -If no prefix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    set(...)
    set(string,logic=1)

    -Returns a character set for string: a bit encoded version
    -of the characters occurring in string.
    -- logic can be set to 0 if all characters *not* in string
    -  should go into the set
    -
    setfind(...)
    setfind(text,set,start=0,stop=len(text))

    -Find the first occurence of any character from set in
    -text[start:stop]
    - set must be a string obtained with set()
    -DEPRECATED: use CharSet().search() instead.
    -
    setsplit(...)
    setsplit(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set,
    -omitting the splitting parts and empty substrings.
    -set must be a string obtained from set()
    -DEPRECATED: use CharSet().split() instead.
    -
    setsplitx(...)
    setsplitx(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set, so
    -that every second entry consists only of characters in set.
    -set must be a string obtained with set()
    -DEPRECATED: use CharSet().splitx() instead.
    -
    setstrip(...)
    setstrip(text,set,start=0,stop=len(text),mode=0)

    -Strip all characters in text[start:stop] appearing in set.
    -mode indicates where to strip (<0: left; =0: left and right;
    ->0: right). set must be a string obtained with set()
    -DEPRECATED: use CharSet().strip() instead.
    -
    splitat(...)
    splitat(text,char,nth=1,start=0,stop=len(text))

    -Split text[start:stop] into two substrings at the nth
    -occurance of char and return the result as 2-tuple. If the
    -character is not found, the second string is empty. nth may
    -be negative: the search is then done from the right and the
    -first string is empty in case the character is not found.
    -
    str2hex(...)
    str2hex(text)

    -Return text converted to a string consisting of two byte
    -HEX values.
    -
    suffix(...)
    suffix(text,suffixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -suffix out of the tuple of strings given in suffixes.
    -If no suffix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    tag(...)
    tag(text,tagtable,sliceleft=0,sliceright=len(text),taglist=[],context=None) 
    -Produce a tag list for a string, given a tag-table
    -- returns a tuple (success, taglist, nextindex)
    -- if taglist == None, then no taglist is created
    -
    upper(...)
    upper(text)

    -Return text converted to upper case.
    -

    - - - - - -
     
    -Data
           A2Z = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    -A2Z_charset = <Character Set object for 'A-Z'>
    -A2Z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -AllIn = 11
    -AllInCharSet = 41
    -AllInSet = 31
    -AllNotIn = 12
    -AppendMatch = 2048
    -AppendTagobj = 1024
    -AppendToTagobj = 512
    -BOYERMOORE = 0
    -Break = 0
    -Call = 201
    -CallArg = 202
    -CallTag = 256
    -EOF = 101
    -FASTSEARCH = 1
    -Fail = 100
    -Here = 1
    -Is = 13
    -IsIn = 14
    -IsInCharSet = 42
    -IsInSet = 32
    -IsNot = 15
    -IsNotIn = 15
    -Jump = 100
    -JumpTarget = 104
    -LookAhead = 4096
    -Loop = 205
    -LoopControl = 206
    -MatchFail = -1000000
    -MatchOk = 1000000
    -Move = 103
    -NoWord = 211
    -Reset = -1
    -Skip = 102
    -SubTable = 207
    -SubTableInList = 208
    -TRIVIAL = 2
    -Table = 203
    -TableInList = 204
    -ThisTable = 999
    -To = 0
    -ToBOF = 0
    -ToEOF = -1
    -Umlaute = '\xc4\xd6\xdc'
    -Umlaute_charset = <Character Set object for '\xc4\xd6\xdc'>
    -Word = 21
    -WordEnd = 23
    -WordStart = 22
    -a2z = 'abcdefghijklmnopqrstuvwxyz'
    -a2z_charset = <Character Set object for 'a-z'>
    -a2z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -ab = (('ab', 21, 'ab', 0),)
    -alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
    -alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'>
    -alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alphanumeric = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
    -alphanumeric_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'>
    -alphanumeric_set = '\x00\x00\x00\x00\x00\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -any = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -any_charset = <Character Set object for '\x00-\xff'>
    -any_set = '\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -cdef = (('cd', 21, 'cd', 0), ('ef', 21, 'ef', 1, 1))
    -formfeed = '\x0c'
    -formfeed_charset = <Character Set object for '\x0c'>
    -german_alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'
    -german_alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTU...hijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'>
    -german_alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00@\x90\x10\x00@\x10'
    -id2cmd = {-1000000: 'MatchFail', -1: 'ToEOF', 0: 'Fail/Jump', 1: 'Here', 11: 'AllIn', 12: 'AllNotIn', 13: 'Is', 14: 'IsIn', 15: 'IsNotIn', 21: 'Word', ...}
    -newline = '\r\n'
    -newline_charset = <Character Set object for '\r\n'>
    -newline_set = '\x00$\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -nonwhitespace_charset = <Character Set object for '^ \t\x0b\r\n\x0c'>
    -nonwhitespace_set = '\xff\xc1\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -number = '0123456789'
    -number_charset = <Character Set object for '0-9'>
    -number_set = '\x00\x00\x00\x00\x00\x00\xff\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -sFindWord = 213
    -sWordEnd = 212
    -sWordStart = 211
    -tableList = [(('ab', 21, 'ab', 0),), (('cd', 21, 'cd', 0), ('ef', 21, 'ef', 1, 1))]
    -tagtable_cache = {(46912536021760, 0): <String Tag Table object>, (46912540134840, 0): <String Tag Table object>, (46912541410080, 0): <String Tag Table object>, (46912541454848, 0): <String Tag Table object>, (46912541455136, 0): <String Tag Table object>, (46912541455208, 0): <String Tag Table object>, (46912541489264, 0): <String Tag Table object>, (46912541566016, 0): <String Tag Table object>, (46912543903688, 0): <String Tag Table object>, (46912543908136, 0): <String Tag Table object>, ...}
    -to_lower = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -to_upper = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -umlaute = '\xe4\xf6\xfc\xdf'
    -umlaute_charset = <Character Set object for '\xe4\xf6\xfc\xdf'>
    -white = ' \t\x0b'
    -white_charset = <Character Set object for ' \t\x0b'>
    -white_set = '\x00\x02\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -whitespace = ' \t\x0b\r\n\x0c'
    -whitespace_charset = <Character Set object for ' \t\x0b\r\n\x0c'>
    -whitespace_set = '\x00&\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.mx_special.html simpleparse-2.2.0/doc/pydoc/simpleparse.tests.mx_special.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.mx_special.html 2006-02-19 01:05:16.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.tests.mx_special.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,478 +0,0 @@ - - -Python: module simpleparse.tests.mx_special - - - - -
     
    - 
    simpleparse.tests.mx_special
    index
    /home/mcfletch/pylive/simpleparse/tests/mx_special.py
    -

    Low-level matching tests for mx.TextTools

    -

    - - - - - -
     
    -Modules
           
    simpleparse.stt.TextTools.Constants
    -simpleparse.stt.TextTools
    -
    simpleparse.stt.TextTools.mxTextTools.mxTextTools
    -pprint
    -
    string
    -time
    -
    types
    -unittest
    -

    - - - - - -
     
    -Classes
           
    -
    TestCase(object) -
    -
    -
    MXSpecialTests -
    -
    -
    -

    - - - - - -
     
    -class MXSpecialTests(TestCase)
        
    Method resolution order:
    -
    MXSpecialTests
    -
    TestCase
    -
    object
    -
    -
    -Methods defined here:
    -
    doBasicTest(self, table, testvalue, expected, startPosition=0)
    - -
    testBMSMove(self)
    Negative, optional string value
    - -
    testEOF1(self)
    Test EOF command

    -Although it's not documented, the original code returned
    -the EOF position as the left and right coords for the match,
    -so we mimic that behaviour now.
    - -
    testEOF3(self)
    Test EOF command when in middle of buffer
    - -
    testFail1(self)
    Test Fail command
    - -
    testFail2(self)
    Test Fail command with ignore fail (Jump)
    - -
    testJumpBeforeTable(self)
    Test Jump to before table (explicit fail)

    -Note: this reports the position attained by the
    -matching child (2) as the "error position", not
    -the position before that child (0).
    - -
    testJumpTarget(self)
    Test JumpTarget command in normal usage
    - -
    testJumpTargetNamed(self)
    Test JumpTarget command with tagobj specified
    - -
    testMove1(self)
    Test Move command
    -XXX Should have tests for after buffer moves
    - -
    testMove2(self)
    Test Move command with negative to middle of buffer
    -XXX should have tests for before buffer

    -Note: this command is non-intuitive for Python users,
    -the negative slicing is 1 beyond what it would be for Python
    -(i.e. -1 in Python is 1 before the end, whereas in this
    -command it is the end)
    - -
    testMove3(self)
    Test Move command
    - -
    testMove4(self)
    Test Move to EOF
    - -
    testNegativeOptString1(self)
    Negative, optional string value with positive match (should return 0 as length of match)
    - -
    testSkip1(self)
    Test Skip command
    - -
    testSkip2(self)
    Test Skip command with negative to before buffer

    -Note: I don't like this, but it's what we should expect
    -from the system, so blah. Would be better IMO to have
    -success (within the buffer) and failure (outside the buffer)
    -but then we need a way to spell (jump, even outside buffer)

    -Should have a test for what to do when we have AppendMatch
    -flag in this case...
    - -
    -Methods inherited from TestCase:
    -
    __call__(self, *args, **kwds)
    - -
    __init__(self, methodName='runTest')
    Create an instance of the class that will use the named test
    -method when executed. Raises a ValueError if the instance does
    -not have a method with the specified name.
    - -
    __repr__(self)
    - -
    __str__(self)
    - -
    assertAlmostEqual = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertAlmostEquals = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertEqual = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertEquals = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertFalse = failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    assertNotAlmostEqual = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotAlmostEquals = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotEqual = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertNotEquals = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertRaises = failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    assertTrue = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    assert_ = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    countTestCases(self)
    - -
    debug(self)
    Run the test without collecting errors in a TestResult
    - -
    defaultTestResult(self)
    - -
    fail(self, msg=None)
    Fail immediately, with the given message.
    - -
    failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    id(self)
    - -
    run(self, result=None)
    - -
    setUp(self)
    Hook method for setting up the test fixture before exercising it.
    - -
    shortDescription(self)
    Returns a one-line description of the test, or None if no
    -description has been provided.

    -The default implementation of this method returns the first line of
    -the specified test method's docstring.
    - -
    tearDown(self)
    Hook method for deconstructing the test fixture after testing it.
    - -
    -Data and other attributes inherited from TestCase:
    -
    __dict__ = <dictproxy object>
    dictionary for instance variables (if defined)
    - -
    __weakref__ = <attribute '__weakref__' of 'TestCase' objects>
    list of weak references to the object (if defined)
    - -
    failureException = <class exceptions.AssertionError>
    Assertion failed.
    - -

    - - - - - -
     
    -Functions
           
    BMS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    CharSet(...)
    CharSet(definition)

    -Create a character set matching object from the string
    -
    FS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    FSType = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    TagTable(...)
    TagTable(definition[,cachable=1])
    -
    TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    UnicodeTagTable(...)
    TagTable(definition[,cachable=1])
    -
    charsplit(...)
    charsplit(text,char,start=0,stop=len(text))

    -Split text[start:stop] into substrings at char and
    -return the result as list of strings.
    -
    cmp(...)
    cmp(a,b)

    -Compare two valid taglist tuples w/r to their slice
    -position; this is useful for sorting joinlists.
    -
    getSuite()
    -
    hex2str(...)
    hex2str(text)

    -Return text interpreted as two byte HEX values converted
    -to a string.
    -
    isascii(...)
    isascii(text,start=0,stop=len(text))

    -Return 1/0 depending on whether text only contains ASCII
    -characters.
    -
    join(...)
    join(joinlist,sep='',start=0,stop=len(joinlist))

    -Copy snippets from different strings together producing a
    -new string
    -The first argument must be a list of tuples or strings;
    -tuples must be of the form (string,l,r[,...]) and turn out
    -as string[l:r]
    -NOTE: the syntax used for negative slices is different
    -than the Python standard: -1 corresponds to the first
    -character *after* the string, e.g. ('Example',0,-1) gives
    -'Example' and not 'Exampl', like in Python
    -sep is an optional separator string, start and stop
    -define the slice of joinlist that is taken into accont.
    -
    joinlist(...)
    joinlist(text,list,start=0,stop=len(text))

    -Takes a list of tuples (replacement,l,r,...) and produces
    -a taglist suitable for join() which creates a copy
    -of text where every slice [l:r] is replaced by the
    -given replacement
    -- the list must be sorted using cmp() as compare function
    -- it may not contain overlapping slices
    -- the slices may not contain negative indices
    -- if the taglist cannot contain overlapping slices, you can
    -  give this function the taglist produced by tag() directly
    -  (sorting is not needed, as the list will already be sorted)
    -- start and stop set the slice to work in, i.e. text[start:stop]
    -
    lower(...)
    lower(text)

    -Return text converted to lower case.
    -
    prefix(...)
    prefix(text,prefixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -prefix out of the tuple of strings given in prefixes.
    -If no prefix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    set(...)
    set(string,logic=1)

    -Returns a character set for string: a bit encoded version
    -of the characters occurring in string.
    -- logic can be set to 0 if all characters *not* in string
    -  should go into the set
    -
    setfind(...)
    setfind(text,set,start=0,stop=len(text))

    -Find the first occurence of any character from set in
    -text[start:stop]
    - set must be a string obtained with set()
    -DEPRECATED: use CharSet().search() instead.
    -
    setsplit(...)
    setsplit(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set,
    -omitting the splitting parts and empty substrings.
    -set must be a string obtained from set()
    -DEPRECATED: use CharSet().split() instead.
    -
    setsplitx(...)
    setsplitx(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set, so
    -that every second entry consists only of characters in set.
    -set must be a string obtained with set()
    -DEPRECATED: use CharSet().splitx() instead.
    -
    setstrip(...)
    setstrip(text,set,start=0,stop=len(text),mode=0)

    -Strip all characters in text[start:stop] appearing in set.
    -mode indicates where to strip (<0: left; =0: left and right;
    ->0: right). set must be a string obtained with set()
    -DEPRECATED: use CharSet().strip() instead.
    -
    splitat(...)
    splitat(text,char,nth=1,start=0,stop=len(text))

    -Split text[start:stop] into two substrings at the nth
    -occurance of char and return the result as 2-tuple. If the
    -character is not found, the second string is empty. nth may
    -be negative: the search is then done from the right and the
    -first string is empty in case the character is not found.
    -
    str2hex(...)
    str2hex(text)

    -Return text converted to a string consisting of two byte
    -HEX values.
    -
    suffix(...)
    suffix(text,suffixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -suffix out of the tuple of strings given in suffixes.
    -If no suffix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    tag(...)
    tag(text,tagtable,sliceleft=0,sliceright=len(text),taglist=[],context=None) 
    -Produce a tag list for a string, given a tag-table
    -- returns a tuple (success, taglist, nextindex)
    -- if taglist == None, then no taglist is created
    -
    upper(...)
    upper(text)

    -Return text converted to upper case.
    -

    - - - - - -
     
    -Data
           A2Z = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    -A2Z_charset = <Character Set object for 'A-Z'>
    -A2Z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -AllIn = 11
    -AllInCharSet = 41
    -AllInSet = 31
    -AllNotIn = 12
    -AnyInt = <Any Integer>
    -AppendMatch = 2048
    -AppendTagobj = 1024
    -AppendToTagobj = 512
    -BOYERMOORE = 0
    -Break = 0
    -Call = 201
    -CallArg = 202
    -CallTag = 256
    -EOF = 101
    -FASTSEARCH = 1
    -Fail = 100
    -Here = 1
    -Is = 13
    -IsIn = 14
    -IsInCharSet = 42
    -IsInSet = 32
    -IsNot = 15
    -IsNotIn = 15
    -Jump = 100
    -JumpTarget = 104
    -LookAhead = 4096
    -Loop = 205
    -LoopControl = 206
    -MatchFail = -1000000
    -MatchOk = 1000000
    -Move = 103
    -NoWord = 211
    -NullResult = <Null Children>
    -Reset = -1
    -Skip = 102
    -SubTable = 207
    -SubTableInList = 208
    -TRIVIAL = 2
    -Table = 203
    -TableInList = 204
    -ThisTable = 999
    -To = 0
    -ToBOF = 0
    -ToEOF = -1
    -Umlaute = '\xc4\xd6\xdc'
    -Umlaute_charset = <Character Set object for '\xc4\xd6\xdc'>
    -Word = 21
    -WordEnd = 23
    -WordStart = 22
    -a2z = 'abcdefghijklmnopqrstuvwxyz'
    -a2z_charset = <Character Set object for 'a-z'>
    -a2z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
    -alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'>
    -alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alphanumeric = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
    -alphanumeric_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'>
    -alphanumeric_set = '\x00\x00\x00\x00\x00\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -any = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -any_charset = <Character Set object for '\x00-\xff'>
    -any_set = '\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -formfeed = '\x0c'
    -formfeed_charset = <Character Set object for '\x0c'>
    -german_alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'
    -german_alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTU...hijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'>
    -german_alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00@\x90\x10\x00@\x10'
    -id2cmd = {-1000000: 'MatchFail', -1: 'ToEOF', 0: 'Fail/Jump', 1: 'Here', 11: 'AllIn', 12: 'AllNotIn', 13: 'Is', 14: 'IsIn', 15: 'IsNotIn', 21: 'Word', ...}
    -mxVersion = ('2', '1', '0')
    -newline = '\r\n'
    -newline_charset = <Character Set object for '\r\n'>
    -newline_set = '\x00$\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -nonwhitespace_charset = <Character Set object for '^ \t\x0b\r\n\x0c'>
    -nonwhitespace_set = '\xff\xc1\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -number = '0123456789'
    -number_charset = <Character Set object for '0-9'>
    -number_set = '\x00\x00\x00\x00\x00\x00\xff\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -sFindWord = 213
    -sWordEnd = 212
    -sWordStart = 211
    -tagtable_cache = {(46912536021760, 0): <String Tag Table object>, (46912540134840, 0): <String Tag Table object>, (46912541410080, 0): <String Tag Table object>, (46912541454848, 0): <String Tag Table object>, (46912541455136, 0): <String Tag Table object>, (46912541455208, 0): <String Tag Table object>, (46912541489264, 0): <String Tag Table object>, (46912541566016, 0): <String Tag Table object>, (46912543903688, 0): <String Tag Table object>, (46912543908136, 0): <String Tag Table object>, ...}
    -to_lower = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -to_upper = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -umlaute = '\xe4\xf6\xfc\xdf'
    -umlaute_charset = <Character Set object for '\xe4\xf6\xfc\xdf'>
    -white = ' \t\x0b'
    -white_charset = <Character Set object for ' \t\x0b'>
    -white_set = '\x00\x02\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -whitespace = ' \t\x0b\r\n\x0c'
    -whitespace_charset = <Character Set object for ' \t\x0b\r\n\x0c'>
    -whitespace_set = '\x00&\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.mx_test.html simpleparse-2.2.0/doc/pydoc/simpleparse.tests.mx_test.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.mx_test.html 2006-02-19 01:05:16.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.tests.mx_test.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,35 +0,0 @@ - - -Python: module simpleparse.tests.mx_test - - - - -
     
    - 
    simpleparse.tests.mx_test
    index
    /home/mcfletch/pylive/simpleparse/tests/mx_test.py
    -

    -

    - - - - - -
     
    -Modules
           
    simpleparse.tests.mx_flag
    -simpleparse.tests.mx_high
    -
    simpleparse.tests.mx_low
    -simpleparse.tests.mx_recursive
    -
    simpleparse.tests.mx_special
    -unittest
    -

    - - - - - -
     
    -Functions
           
    getSuite()
    -
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_backup_on_subtable_failure.html simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_backup_on_subtable_failure.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_backup_on_subtable_failure.html 2006-02-19 01:05:16.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_backup_on_subtable_failure.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,34 +0,0 @@ - - -Python: module simpleparse.tests.test_backup_on_subtable_failure - - - - -
     
    - 
    simpleparse.tests.test_backup_on_subtable_failure
    index
    /home/mcfletch/pylive/simpleparse/tests/test_backup_on_subtable_failure.py
    -

    -

    - - - - - -
     
    -Modules
           
    simpleparse.stt.TextTools.TextTools
    -
    pprint
    -

    - - - - - -
     
    -Data
           declaration = "testparser := (a,b)*\na := 'a'\nb := 'b'\n"
    -expectedResult = (1, [('a', 0, 1, []), ('b', 1, 2, [])], 2)
    -parser = ((None, 207, (('a', 21, 'a'), ('b', 21, 'b')), 2, 1), (None, 101, 1, -1, 1))
    -result = (1, [('a', 0, 1, None), ('b', 1, 2, None)], 2)
    -testdata = 'aba'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_common_chartypes.html simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_common_chartypes.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_common_chartypes.html 2006-02-19 01:05:16.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_common_chartypes.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,206 +0,0 @@ - - -Python: module simpleparse.tests.test_common_chartypes - - - - -
     
    - 
    simpleparse.tests.test_common_chartypes
    index
    /home/mcfletch/pylive/simpleparse/tests/test_common_chartypes.py
    -

    -

    - - - - - -
     
    -Modules
           
    simpleparse.common.chartypes
    -simpleparse.dispatchprocessor
    -
    string
    -simpleparse.common.timezone_names
    -
    unittest
    -

    - - - - - -
     
    -Classes
           
    -
    TestCase(object) -
    -
    -
    CommonTests -
    -
    -
    -

    - - - - - -
     
    -class CommonTests(TestCase)
        
    Method resolution order:
    -
    CommonTests
    -
    TestCase
    -
    object
    -
    -
    -Methods defined here:
    -
    doBasicTest(self, definition, parserName, testValue, expected)
    - -
    testBasic(self)
    - -
    testEOF(self)
    - -
    testEOFFail(self)
    - -
    testTZ(self)
    - -
    -Methods inherited from TestCase:
    -
    __call__(self, *args, **kwds)
    - -
    __init__(self, methodName='runTest')
    Create an instance of the class that will use the named test
    -method when executed. Raises a ValueError if the instance does
    -not have a method with the specified name.
    - -
    __repr__(self)
    - -
    __str__(self)
    - -
    assertAlmostEqual = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertAlmostEquals = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertEqual = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertEquals = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertFalse = failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    assertNotAlmostEqual = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotAlmostEquals = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotEqual = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertNotEquals = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertRaises = failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    assertTrue = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    assert_ = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    countTestCases(self)
    - -
    debug(self)
    Run the test without collecting errors in a TestResult
    - -
    defaultTestResult(self)
    - -
    fail(self, msg=None)
    Fail immediately, with the given message.
    - -
    failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    id(self)
    - -
    run(self, result=None)
    - -
    setUp(self)
    Hook method for setting up the test fixture before exercising it.
    - -
    shortDescription(self)
    Returns a one-line description of the test, or None if no
    -description has been provided.

    -The default implementation of this method returns the first line of
    -the specified test method's docstring.
    - -
    tearDown(self)
    Hook method for deconstructing the test fixture after testing it.
    - -
    -Data and other attributes inherited from TestCase:
    -
    __dict__ = <dictproxy object>
    dictionary for instance variables (if defined)
    - -
    __weakref__ = <attribute '__weakref__' of 'TestCase' objects>
    list of weak references to the object (if defined)
    - -
    failureException = <class exceptions.AssertionError>
    Assertion failed.
    - -

    - - - - - -
     
    -Functions
           
    getSuite()
    -

    - - - - - -
     
    -Data
           fulltrans = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_common_comments.html simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_common_comments.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_common_comments.html 2006-02-19 01:05:16.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_common_comments.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,197 +0,0 @@ - - -Python: module simpleparse.tests.test_common_comments - - - - -
     
    - 
    simpleparse.tests.test_common_comments
    index
    /home/mcfletch/pylive/simpleparse/tests/test_common_comments.py
    -

    Test the various common library comment productions

    -

    - - - - - -
     
    -Modules
           
    simpleparse.common.comments
    -
    simpleparse.dispatchprocessor
    -
    string
    -
    unittest
    -

    - - - - - -
     
    -Classes
           
    -
    TestCase(object) -
    -
    -
    CommonTests -
    -
    -
    -

    - - - - - -
     
    -class CommonTests(TestCase)
        
    Method resolution order:
    -
    CommonTests
    -
    TestCase
    -
    object
    -
    -
    -Methods defined here:
    -
    testBasic(self)
    - -
    -Methods inherited from TestCase:
    -
    __call__(self, *args, **kwds)
    - -
    __init__(self, methodName='runTest')
    Create an instance of the class that will use the named test
    -method when executed. Raises a ValueError if the instance does
    -not have a method with the specified name.
    - -
    __repr__(self)
    - -
    __str__(self)
    - -
    assertAlmostEqual = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertAlmostEquals = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertEqual = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertEquals = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertFalse = failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    assertNotAlmostEqual = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotAlmostEquals = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotEqual = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertNotEquals = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertRaises = failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    assertTrue = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    assert_ = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    countTestCases(self)
    - -
    debug(self)
    Run the test without collecting errors in a TestResult
    - -
    defaultTestResult(self)
    - -
    fail(self, msg=None)
    Fail immediately, with the given message.
    - -
    failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    id(self)
    - -
    run(self, result=None)
    - -
    setUp(self)
    Hook method for setting up the test fixture before exercising it.
    - -
    shortDescription(self)
    Returns a one-line description of the test, or None if no
    -description has been provided.

    -The default implementation of this method returns the first line of
    -the specified test method's docstring.
    - -
    tearDown(self)
    Hook method for deconstructing the test fixture after testing it.
    - -
    -Data and other attributes inherited from TestCase:
    -
    __dict__ = <dictproxy object>
    dictionary for instance variables (if defined)
    - -
    __weakref__ = <attribute '__weakref__' of 'TestCase' objects>
    list of weak references to the object (if defined)
    - -
    failureException = <class exceptions.AssertionError>
    Assertion failed.
    - -

    - - - - - -
     
    -Functions
           
    getSuite()
    -

    - - - - - -
     
    -Data
           parseTests = [('c_comment', ['/* this */', '/* this \n\n*/'], ['// this', '# this', '# this\n', '# this\r\n']), ('c_nest_comment', ['/* this */', '/* this \n\n*/', '/* /* this */ */', '/* /* this \n*/ */'], ['// this', '# this', '; this']), ('hash_comment', ['# this', '# this\n', '# this\r\n'], ['// this', '/* this */', '/* /* this */ */']), ('semicolon_comment', ['; this', '; this\n', '; this\r\n'], ['# this', '// this', '/* this */', '/* /* this */ */']), ('slashslash_comment', ['// this', '// this\n', '// this\r\n'], ['# this', '/ this', '/* this */', '/* /* this */ */'])]
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_common_iso_date.html simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_common_iso_date.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_common_iso_date.html 2006-02-19 01:05:16.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_common_iso_date.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,206 +0,0 @@ - - -Python: module simpleparse.tests.test_common_iso_date - - - - -
     
    - 
    simpleparse.tests.test_common_iso_date
    index
    /home/mcfletch/pylive/simpleparse/tests/test_common_iso_date.py
    -

    -

    - - - - - -
     
    -Modules
           
    mx.DateTime
    -simpleparse.common.iso_date
    -
    simpleparse.common.iso_date_loose
    -string
    -
    time
    -unittest
    -

    - - - - - -
     
    -Classes
           
    -
    TestCase(object) -
    -
    -
    CommonTests -
    -
    -
    -

    - - - - - -
     
    -class CommonTests(TestCase)
        
    Method resolution order:
    -
    CommonTests
    -
    TestCase
    -
    object
    -
    -
    -Methods defined here:
    -
    testISODate(self)
    Test the parsing of ISO date and time formats
    - -
    testISODateLoose(self)
    Test the parsing of ISO date and time formats
    - -
    testProductions2(self)
    - -
    testProductionsStrict(self)
    - -
    -Methods inherited from TestCase:
    -
    __call__(self, *args, **kwds)
    - -
    __init__(self, methodName='runTest')
    Create an instance of the class that will use the named test
    -method when executed. Raises a ValueError if the instance does
    -not have a method with the specified name.
    - -
    __repr__(self)
    - -
    __str__(self)
    - -
    assertAlmostEqual = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertAlmostEquals = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertEqual = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertEquals = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertFalse = failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    assertNotAlmostEqual = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotAlmostEquals = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotEqual = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertNotEquals = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertRaises = failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    assertTrue = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    assert_ = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    countTestCases(self)
    - -
    debug(self)
    Run the test without collecting errors in a TestResult
    - -
    defaultTestResult(self)
    - -
    fail(self, msg=None)
    Fail immediately, with the given message.
    - -
    failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    id(self)
    - -
    run(self, result=None)
    - -
    setUp(self)
    Hook method for setting up the test fixture before exercising it.
    - -
    shortDescription(self)
    Returns a one-line description of the test, or None if no
    -description has been provided.

    -The default implementation of this method returns the first line of
    -the specified test method's docstring.
    - -
    tearDown(self)
    Hook method for deconstructing the test fixture after testing it.
    - -
    -Data and other attributes inherited from TestCase:
    -
    __dict__ = <dictproxy object>
    dictionary for instance variables (if defined)
    - -
    __weakref__ = <attribute '__weakref__' of 'TestCase' objects>
    list of weak references to the object (if defined)
    - -
    failureException = <class exceptions.AssertionError>
    Assertion failed.
    - -

    - - - - - -
     
    -Functions
           
    getSuite()
    -

    - - - - - -
     
    -Data
           fulltrans = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -tzOffset = <DateTimeDelta object for '05:00:00.00' at 2aaaad8d87a0>
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_common_numbers.html simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_common_numbers.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_common_numbers.html 2006-02-19 01:05:16.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_common_numbers.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,197 +0,0 @@ - - -Python: module simpleparse.tests.test_common_numbers - - - - -
     
    - 
    simpleparse.tests.test_common_numbers
    index
    /home/mcfletch/pylive/simpleparse/tests/test_common_numbers.py
    -

    -

    - - - - - -
     
    -Modules
           
    simpleparse.dispatchprocessor
    -
    simpleparse.common.numbers
    -
    string
    -
    unittest
    -

    - - - - - -
     
    -Classes
           
    -
    TestCase(object) -
    -
    -
    CommonTests -
    -
    -
    -

    - - - - - -
     
    -class CommonTests(TestCase)
        
    Method resolution order:
    -
    CommonTests
    -
    TestCase
    -
    object
    -
    -
    -Methods defined here:
    -
    testBasic(self)
    - -
    -Methods inherited from TestCase:
    -
    __call__(self, *args, **kwds)
    - -
    __init__(self, methodName='runTest')
    Create an instance of the class that will use the named test
    -method when executed. Raises a ValueError if the instance does
    -not have a method with the specified name.
    - -
    __repr__(self)
    - -
    __str__(self)
    - -
    assertAlmostEqual = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertAlmostEquals = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertEqual = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertEquals = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertFalse = failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    assertNotAlmostEqual = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotAlmostEquals = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotEqual = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertNotEquals = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertRaises = failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    assertTrue = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    assert_ = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    countTestCases(self)
    - -
    debug(self)
    Run the test without collecting errors in a TestResult
    - -
    defaultTestResult(self)
    - -
    fail(self, msg=None)
    Fail immediately, with the given message.
    - -
    failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    id(self)
    - -
    run(self, result=None)
    - -
    setUp(self)
    Hook method for setting up the test fixture before exercising it.
    - -
    shortDescription(self)
    Returns a one-line description of the test, or None if no
    -description has been provided.

    -The default implementation of this method returns the first line of
    -the specified test method's docstring.
    - -
    tearDown(self)
    Hook method for deconstructing the test fixture after testing it.
    - -
    -Data and other attributes inherited from TestCase:
    -
    __dict__ = <dictproxy object>
    dictionary for instance variables (if defined)
    - -
    __weakref__ = <attribute '__weakref__' of 'TestCase' objects>
    list of weak references to the object (if defined)
    - -
    failureException = <class exceptions.AssertionError>
    Assertion failed.
    - -

    - - - - - -
     
    -Functions
           
    getSuite()
    -

    - - - - - -
     
    -Data
           _data = [('int_unsigned', <class simpleparse.common.numbers.IntInterpreter>, [('0 ', 1, 0), ('1 ', 1, 1), ('23 ', 2, 23), ('0x ', 1, 0), ('0. ', 1, 0)], ['.0', 'a']), ('int', <class simpleparse.common.numbers.IntInterpreter>, [('0 ', 1, 0), ('1 ', 1, 1), ('23 ', 2, 23), ('0x ', 1, 0), ('0. ', 1, 0), ('+0 ', 2, 0), ('+1 ', 2, 1), ('+23 ', 3, 23), ('+0x ', 2, 0), ('+0. ', 2, 0), ('-0 ', 2, 0), ('-1 ', 2, -1), ('-23 ', 3, -23), ('-0x ', 2, 0), ('-0. ', 2, 0)], ['.0', 'a', '+.0', '+a', '-.0', '-a']), ('hex', <class simpleparse.common.numbers.HexInterpreter>, [('0x0 ', 3, 0), ('0x1 ', 3, 1), ('0x23 ', 4, 35), ('0x0x ', 3, 0), ('0x0. ', 3, 0), ('+0x0 ', 4, 0), ('+0x1 ', 4, 1), ('+0x23 ', 5, 35), ('+0x0x ', 4, 0), ('+0x0. ', 4, 0), ('-0x0 ', 4, 0), ('-0x1 ', 4, -1), ('-0x23 ', 5, -35), ('-0x0x ', 4, 0), ('-0x0. ', 4, 0), ('0xa ', 3, 10), ('0xaaaaaaaaaaaaaaaaa ', 19, 196765270119568550570L), ('0xA ', 3, 10), ('0xAAAAAAAAAAAAAAAAA ', 19, 196765270119568550570L)], ['.0', 'a', '+.0', '+a', '-.0', '-a', '0x ', '0xg', '0x']), ('binary_number', <class simpleparse.common.numbers.BinaryInterpreter>, [('0b0 ', 2, 0), ('1b0 ', 2, 1), ('10b0 ', 3, 2), ('10000000000b0 ', 12, 1024), ('0B0 ', 2, 0), ('1B0 ', 2, 1), ('10B0 ', 3, 2), ('10000000000B0 ', 12, 1024)], ['.0', 'a', '+.0', '+a', '-.0', '-a', '0x ', '0xg', '0x']), ('float', <class simpleparse.common.numbers.FloatInterpreter>, [('0. ', 2, 0), ('1. ', 2, 1), ('23. ', 3, 23), ('.0 ', 2, 0), ('.1 ', 2, 0.10000000000000001), ('.23 ', 3, 0.23000000000000001), ('0.0x ', 3, 0), ('1.1x ', 3, 1.1000000000000001), ('2000000.22222222x ', 16, 2000000.2222222199), ('1.1e20 ', 6, 1.1e+20), ('1.1e-20 ', 7, 1.1e-20), ('-1.1e20 ', 7, -1.1e+20)], ['0x.0', '23', '-23', '-43*2a', '+23', '-a']), ('float_floatexp', <class simpleparse.common.numbers.FloatFloatExpInterpreter>, [('0. ', 2, 0), ('1. ', 2, 1), ('23. ', 3, 23), ('.0 ', 2, 0), ('.1 ', 2, 0.10000000000000001), ('.23 ', 3, 0.23000000000000001), ('0.0x ', 3, 0), ('1.1x ', 3, 1.1000000000000001), ('2000000.22222222x ', 16, 2000000.2222222199), ('1.1e20 ', 6, 1.1000000000000002e+20), ('1.1e-20 ', 7, 1.1e-20), ('-1.1e20 ', 7, -1.1000000000000002e+20), ('1.1e20.34 ', 9, 2.4065377863445073e+20), ('1.1e-.34 ', 8, 0.50279700857636256)], ['0x.0', '23', '-23', '-43*2a', '+23', '-a'])]
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_common_strings.html simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_common_strings.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_common_strings.html 2006-02-19 01:05:16.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_common_strings.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,197 +0,0 @@ - - -Python: module simpleparse.tests.test_common_strings - - - - -
     
    - 
    simpleparse.tests.test_common_strings
    index
    /home/mcfletch/pylive/simpleparse/tests/test_common_strings.py
    -

    -

    - - - - - -
     
    -Modules
           
    simpleparse.dispatchprocessor
    -
    string
    -
    simpleparse.common.strings
    -
    unittest
    -

    - - - - - -
     
    -Classes
           
    -
    TestCase(object) -
    -
    -
    CommonTests -
    -
    -
    -

    - - - - - -
     
    -class CommonTests(TestCase)
        
    Method resolution order:
    -
    CommonTests
    -
    TestCase
    -
    object
    -
    -
    -Methods defined here:
    -
    testBasic(self)
    - -
    -Methods inherited from TestCase:
    -
    __call__(self, *args, **kwds)
    - -
    __init__(self, methodName='runTest')
    Create an instance of the class that will use the named test
    -method when executed. Raises a ValueError if the instance does
    -not have a method with the specified name.
    - -
    __repr__(self)
    - -
    __str__(self)
    - -
    assertAlmostEqual = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertAlmostEquals = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertEqual = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertEquals = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertFalse = failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    assertNotAlmostEqual = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotAlmostEquals = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotEqual = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertNotEquals = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertRaises = failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    assertTrue = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    assert_ = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    countTestCases(self)
    - -
    debug(self)
    Run the test without collecting errors in a TestResult
    - -
    defaultTestResult(self)
    - -
    fail(self, msg=None)
    Fail immediately, with the given message.
    - -
    failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    id(self)
    - -
    run(self, result=None)
    - -
    setUp(self)
    Hook method for setting up the test fixture before exercising it.
    - -
    shortDescription(self)
    Returns a one-line description of the test, or None if no
    -description has been provided.

    -The default implementation of this method returns the first line of
    -the specified test method's docstring.
    - -
    tearDown(self)
    Hook method for deconstructing the test fixture after testing it.
    - -
    -Data and other attributes inherited from TestCase:
    -
    __dict__ = <dictproxy object>
    dictionary for instance variables (if defined)
    - -
    __weakref__ = <attribute '__weakref__' of 'TestCase' objects>
    list of weak references to the object (if defined)
    - -
    failureException = <class exceptions.AssertionError>
    Assertion failed.
    - -

    - - - - - -
     
    -Functions
           
    getSuite()
    -

    - - - - - -
     
    -Data
           parseTests = [('string_triple_single', ["'''this and that'''", r"'''this \''' '''", "''''''", r"''''\''''"], []), ('string_triple_double', ['"""this and that"""', r'"""this \""" """', '""""""', r'""""\""""'], []), ('string_double_quote', [r'"\p"', r'"\""'], []), ('string', ["'this'", '"that"', r'"\b\f\n\r"', r'"\x32\xff\xcf"', r'"\032\033\055\077"', r'"\t\v\\\a\b\f\n\r"', r'"\t"', r'"\v"', r'"\""'], [])]
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_deep_nesting.html simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_deep_nesting.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_deep_nesting.html 2006-02-19 01:05:16.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_deep_nesting.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,36 +0,0 @@ - - -Python: module simpleparse.tests.test_deep_nesting - - - - -
     
    - 
    simpleparse.tests.test_deep_nesting
    index
    /home/mcfletch/pylive/simpleparse/tests/test_deep_nesting.py
    -

    -

    - - - - - -
     
    -Modules
           
    simpleparse.stt.TextTools.TextTools
    -
    pprint
    -

    - - - - - -
     
    -Data
           AnyInt = <Any Integer>
    -NullResult = <Null Children>
    -declaration = "testparser := as?\nas := a,as?\na := 'a'\n"
    -expectedResult = (1, [('as', 0, 4, [('a', 0, 1, <Null Children>), ('as', 1, 4, [(...), (...)])])], 4)
    -parser = (('as', 204, ([(('as', 204, (...), 1, 1),), (('a', 21, 'a'), ('as', 204, (...), 1, 1)), ((None, 21, 'a'),)], 1), 1, 1),)
    -result = (1, [('as', 0, 4, [('a', 0, 1, None), ('as', 1, 4, [(...), (...)])])], 4)
    -testdata = 'aaaa'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_erroronfail.html simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_erroronfail.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_erroronfail.html 2006-02-19 01:05:16.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_erroronfail.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,216 +0,0 @@ - - -Python: module simpleparse.tests.test_erroronfail - - - - -
     
    - 
    simpleparse.tests.test_erroronfail
    index
    /home/mcfletch/pylive/simpleparse/tests/test_erroronfail.py
    -

    -

    - - - - - -
     
    -Modules
           
    pprint
    -
    unittest
    -

    - - - - - -
     
    -Classes
           
    -
    TestCase(object) -
    -
    -
    ErrorOnFailTests -
    -
    -
    -

    - - - - - - - -
     
    -class ErrorOnFailTests(TestCase)
       Tests of the error-on failure mechanisms
     
     
    Method resolution order:
    -
    ErrorOnFailTests
    -
    TestCase
    -
    object
    -
    -
    -Methods defined here:
    -
    shouldNotRaise(self, definition, parserName, testValue)
    - -
    shouldRaise(self, definition, parserName, testValue)
    - -
    testErrorOnFail1(self)
    - -
    testErrorOnFail10(self)
    Test for use of setting message in definition
    - -
    testErrorOnFail11(self)
    Test proper setting of err message text from !"message" syntax
    - -
    testErrorOnFail12(self)
    Test proper setting of err message text from !"message" syntax
    - -
    testErrorOnFail2(self)
    - -
    testErrorOnFail3(self)
    - -
    testErrorOnFail4(self)
    - -
    testErrorOnFail5(self)
    - -
    testErrorOnFail6(self)
    - -
    testErrorOnFail7(self)
    - -
    testErrorOnFail8(self)
    - -
    testErrorOnFail9(self)
    - -
    -Methods inherited from TestCase:
    -
    __call__(self, *args, **kwds)
    - -
    __init__(self, methodName='runTest')
    Create an instance of the class that will use the named test
    -method when executed. Raises a ValueError if the instance does
    -not have a method with the specified name.
    - -
    __repr__(self)
    - -
    __str__(self)
    - -
    assertAlmostEqual = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertAlmostEquals = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertEqual = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertEquals = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertFalse = failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    assertNotAlmostEqual = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotAlmostEquals = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotEqual = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertNotEquals = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertRaises = failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    assertTrue = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    assert_ = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    countTestCases(self)
    - -
    debug(self)
    Run the test without collecting errors in a TestResult
    - -
    defaultTestResult(self)
    - -
    fail(self, msg=None)
    Fail immediately, with the given message.
    - -
    failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    id(self)
    - -
    run(self, result=None)
    - -
    setUp(self)
    Hook method for setting up the test fixture before exercising it.
    - -
    shortDescription(self)
    Returns a one-line description of the test, or None if no
    -description has been provided.

    -The default implementation of this method returns the first line of
    -the specified test method's docstring.
    - -
    tearDown(self)
    Hook method for deconstructing the test fixture after testing it.
    - -
    -Data and other attributes inherited from TestCase:
    -
    __dict__ = <dictproxy object>
    dictionary for instance variables (if defined)
    - -
    __weakref__ = <attribute '__weakref__' of 'TestCase' objects>
    list of weak references to the object (if defined)
    - -
    failureException = <class exceptions.AssertionError>
    Assertion failed.
    - -

    - - - - - -
     
    -Functions
           
    getSuite()
    -
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_grammarparser.html simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_grammarparser.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_grammarparser.html 2006-02-19 01:05:17.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_grammarparser.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,714 +0,0 @@ - - -Python: module simpleparse.tests.test_grammarparser - - - - -
     
    - 
    simpleparse.tests.test_grammarparser
    index
    /home/mcfletch/pylive/simpleparse/tests/test_grammarparser.py
    -

    Tests that simpleparsegrammar does parse SimpleParse grammars

    -

    - - - - - -
     
    -Modules
           
    simpleparse.stt.TextTools.TextTools
    -
    pprint
    -
    unittest
    -

    - - - - - -
     
    -Classes
           
    -
    TestCase(object) -
    -
    -
    SimpleParseGrammarTests -
    -
    -
    SimpleParseRecursiveTests -
    -
    -
    -
    -
    -

    - - - - - - - -
     
    -class SimpleParseGrammarTests(TestCase)
       Test parsing of the the simpleparse grammar elements
     
     
    Method resolution order:
    -
    SimpleParseGrammarTests
    -
    TestCase
    -
    object
    -
    -
    -Methods defined here:
    -
    doBasicTest(self, parserName, testValue, expected)
    - -
    testChar1(self)
    - -
    testChar2(self)
    - -
    testChar3(self)
    - -
    testChar4(self)
    - -
    testChar5(self)
    - -
    testChar6(self)
    - -
    testChar7(self)
    - -
    testChar8(self)
    - -
    testChar85(self)
    - -
    testChar9(self)
    - -
    testCharNoBrace1(self)
    - -
    testCharRange1(self)
    - -
    testComment1(self)
    - -
    testComment2(self)
    - -
    testCutToken2(self)
    - -
    testCutToken3(self)
    - -
    testCutToken4(self)
    - -
    testDeclaration(self)
    - -
    testDeclaration2(self)
    - -
    testDeclaration3(self)
    - -
    testDeclaration4(self)
    - -
    testDeclaration5(self)
    - -
    testDeclarationSet1(self)
    - -
    testDeclarationSet2(self)
    Just tries to parse and sees that everything was parsed, doesn't predict the result
    - -
    testElementToken1(self)
    - -
    testElementToken2(self)
    - -
    testElementToken3(self)
    - -
    testElementToken4(self)
    - -
    testElementToken5(self)
    - -
    testElementToken6(self)
    Lookahead indicator with positive
    - -
    testElementToken7(self)
    Lookahead indicator with negative
    - -
    testElementToken8(self)
    Error on fail indicator
    - -
    testElementToken9(self)
    Error on fail indicator with message
    - -
    testErrorOnFailFlag1(self)
    - -
    testExpandedName1(self)
    - -
    testExpandedName2(self)
    - -
    testFOGroup1(self)
    - -
    testGroup1(self)
    - -
    testGroup2(self)
    - -
    testGroup3(self)
    Test group with sequential added group
    -Note that this test also serves to test
    -the function of non-reporting names
    - -
    testGroup4(self)
    Test group with sequential added group
    -Note that this test also serves to test
    -the function of non-reporting names
    - -
    testGroup5(self)
    Test group with sequential added group
    -Note that this test also serves to test
    -the function of non-reporting names
    - -
    testGroup6(self)
    Test group with multiple / 'd values
    - -
    testLiteral1(self)
    - -
    testLiteral2(self)
    - -
    testLiteral3(self)
    - -
    testLiteral4(self)
    - -
    testLiteral5(self)
    - -
    testLiteral6(self)
    - -
    testLiteralDecorator(self)
    - -
    testLiteralDecorator2(self)
    - -
    testLiteralDecorator3(self)
    Decorator must be right next to literal, no whitespace
    - -
    testLookAheadIndicator1(self)
    - -
    testLookAheadIndicator2(self)
    - -
    testName1(self)
    - -
    testName2(self)
    - -
    testName3(self)
    - -
    testNegposIndicator1(self)
    - -
    testNegposIndicator2(self)
    - -
    testOccurenceIndicator1(self)
    - -
    testOccurenceIndicator2(self)
    - -
    testOccurenceIndicator3(self)
    - -
    testOccurenceIndicator4(self)
    - -
    testOccurenceIndicator5(self)
    - -
    testRange1(self)
    - -
    testRange2(self)
    - -
    testRange3(self)
    - -
    testRange4(self)
    Test optional repeating children running into eof

    -Original SimpleParse had a major failure here,
    -system hung trying to parse the [] string.  Basically,
    -there was no check for EOF during a repeating-item
    -parse (save for literals and character sets), so you
    -wound up with infinite loops.
    - -
    testRange5(self)
    Test optional repeating children with no termination

    -Original SimpleParse had a major failure here,
    -system hung trying to parse the [] string.  Basically,
    -there was no check for EOF during a repeating-item
    -parse (save for literals and character sets), so you
    -wound up with infinite loops.
    - -
    testSEQGroup1(self)
    - -
    testSEQToken1(self)
    - -
    testSeqGroup2(self)
    - -
    testUnreportedName1(self)
    - -
    testUnreportedName2(self)
    - -
    testWhitespace1(self)
    - -
    testWhitespace2(self)
    - -
    testWhitespace3(self)
    - -
    testWhitespace4(self)
    - -
    testWhitespace5(self)
    Bug in 2.0.0 where Null comments such as:
    -                "#
    -"

    -                didn't parse.
    - -
    -Methods inherited from TestCase:
    -
    __call__(self, *args, **kwds)
    - -
    __init__(self, methodName='runTest')
    Create an instance of the class that will use the named test
    -method when executed. Raises a ValueError if the instance does
    -not have a method with the specified name.
    - -
    __repr__(self)
    - -
    __str__(self)
    - -
    assertAlmostEqual = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertAlmostEquals = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertEqual = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertEquals = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertFalse = failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    assertNotAlmostEqual = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotAlmostEquals = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotEqual = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertNotEquals = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertRaises = failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    assertTrue = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    assert_ = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    countTestCases(self)
    - -
    debug(self)
    Run the test without collecting errors in a TestResult
    - -
    defaultTestResult(self)
    - -
    fail(self, msg=None)
    Fail immediately, with the given message.
    - -
    failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    id(self)
    - -
    run(self, result=None)
    - -
    setUp(self)
    Hook method for setting up the test fixture before exercising it.
    - -
    shortDescription(self)
    Returns a one-line description of the test, or None if no
    -description has been provided.

    -The default implementation of this method returns the first line of
    -the specified test method's docstring.
    - -
    tearDown(self)
    Hook method for deconstructing the test fixture after testing it.
    - -
    -Data and other attributes inherited from TestCase:
    -
    __dict__ = <dictproxy object>
    dictionary for instance variables (if defined)
    - -
    __weakref__ = <attribute '__weakref__' of 'TestCase' objects>
    list of weak references to the object (if defined)
    - -
    failureException = <class exceptions.AssertionError>
    Assertion failed.
    - -

    - - - - - - - -
     
    -class SimpleParseRecursiveTests(SimpleParseGrammarTests)
       Test parsing of grammar elements with generated version of simpleparse grammar
     
     
    Method resolution order:
    -
    SimpleParseRecursiveTests
    -
    SimpleParseGrammarTests
    -
    TestCase
    -
    object
    -
    -
    -Methods defined here:
    -
    doBasicTest(self, parserName, testValue, expected)
    - -
    -Methods inherited from SimpleParseGrammarTests:
    -
    testChar1(self)
    - -
    testChar2(self)
    - -
    testChar3(self)
    - -
    testChar4(self)
    - -
    testChar5(self)
    - -
    testChar6(self)
    - -
    testChar7(self)
    - -
    testChar8(self)
    - -
    testChar85(self)
    - -
    testChar9(self)
    - -
    testCharNoBrace1(self)
    - -
    testCharRange1(self)
    - -
    testComment1(self)
    - -
    testComment2(self)
    - -
    testCutToken2(self)
    - -
    testCutToken3(self)
    - -
    testCutToken4(self)
    - -
    testDeclaration(self)
    - -
    testDeclaration2(self)
    - -
    testDeclaration3(self)
    - -
    testDeclaration4(self)
    - -
    testDeclaration5(self)
    - -
    testDeclarationSet1(self)
    - -
    testDeclarationSet2(self)
    Just tries to parse and sees that everything was parsed, doesn't predict the result
    - -
    testElementToken1(self)
    - -
    testElementToken2(self)
    - -
    testElementToken3(self)
    - -
    testElementToken4(self)
    - -
    testElementToken5(self)
    - -
    testElementToken6(self)
    Lookahead indicator with positive
    - -
    testElementToken7(self)
    Lookahead indicator with negative
    - -
    testElementToken8(self)
    Error on fail indicator
    - -
    testElementToken9(self)
    Error on fail indicator with message
    - -
    testErrorOnFailFlag1(self)
    - -
    testExpandedName1(self)
    - -
    testExpandedName2(self)
    - -
    testFOGroup1(self)
    - -
    testGroup1(self)
    - -
    testGroup2(self)
    - -
    testGroup3(self)
    Test group with sequential added group
    -Note that this test also serves to test
    -the function of non-reporting names
    - -
    testGroup4(self)
    Test group with sequential added group
    -Note that this test also serves to test
    -the function of non-reporting names
    - -
    testGroup5(self)
    Test group with sequential added group
    -Note that this test also serves to test
    -the function of non-reporting names
    - -
    testGroup6(self)
    Test group with multiple / 'd values
    - -
    testLiteral1(self)
    - -
    testLiteral2(self)
    - -
    testLiteral3(self)
    - -
    testLiteral4(self)
    - -
    testLiteral5(self)
    - -
    testLiteral6(self)
    - -
    testLiteralDecorator(self)
    - -
    testLiteralDecorator2(self)
    - -
    testLiteralDecorator3(self)
    Decorator must be right next to literal, no whitespace
    - -
    testLookAheadIndicator1(self)
    - -
    testLookAheadIndicator2(self)
    - -
    testName1(self)
    - -
    testName2(self)
    - -
    testName3(self)
    - -
    testNegposIndicator1(self)
    - -
    testNegposIndicator2(self)
    - -
    testOccurenceIndicator1(self)
    - -
    testOccurenceIndicator2(self)
    - -
    testOccurenceIndicator3(self)
    - -
    testOccurenceIndicator4(self)
    - -
    testOccurenceIndicator5(self)
    - -
    testRange1(self)
    - -
    testRange2(self)
    - -
    testRange3(self)
    - -
    testRange4(self)
    Test optional repeating children running into eof

    -Original SimpleParse had a major failure here,
    -system hung trying to parse the [] string.  Basically,
    -there was no check for EOF during a repeating-item
    -parse (save for literals and character sets), so you
    -wound up with infinite loops.
    - -
    testRange5(self)
    Test optional repeating children with no termination

    -Original SimpleParse had a major failure here,
    -system hung trying to parse the [] string.  Basically,
    -there was no check for EOF during a repeating-item
    -parse (save for literals and character sets), so you
    -wound up with infinite loops.
    - -
    testSEQGroup1(self)
    - -
    testSEQToken1(self)
    - -
    testSeqGroup2(self)
    - -
    testUnreportedName1(self)
    - -
    testUnreportedName2(self)
    - -
    testWhitespace1(self)
    - -
    testWhitespace2(self)
    - -
    testWhitespace3(self)
    - -
    testWhitespace4(self)
    - -
    testWhitespace5(self)
    Bug in 2.0.0 where Null comments such as:
    -                "#
    -"

    -                didn't parse.
    - -
    -Methods inherited from TestCase:
    -
    __call__(self, *args, **kwds)
    - -
    __init__(self, methodName='runTest')
    Create an instance of the class that will use the named test
    -method when executed. Raises a ValueError if the instance does
    -not have a method with the specified name.
    - -
    __repr__(self)
    - -
    __str__(self)
    - -
    assertAlmostEqual = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertAlmostEquals = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertEqual = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertEquals = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertFalse = failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    assertNotAlmostEqual = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotAlmostEquals = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotEqual = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertNotEquals = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertRaises = failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    assertTrue = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    assert_ = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    countTestCases(self)
    - -
    debug(self)
    Run the test without collecting errors in a TestResult
    - -
    defaultTestResult(self)
    - -
    fail(self, msg=None)
    Fail immediately, with the given message.
    - -
    failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    id(self)
    - -
    run(self, result=None)
    - -
    setUp(self)
    Hook method for setting up the test fixture before exercising it.
    - -
    shortDescription(self)
    Returns a one-line description of the test, or None if no
    -description has been provided.

    -The default implementation of this method returns the first line of
    -the specified test method's docstring.
    - -
    tearDown(self)
    Hook method for deconstructing the test fixture after testing it.
    - -
    -Data and other attributes inherited from TestCase:
    -
    __dict__ = <dictproxy object>
    dictionary for instance variables (if defined)
    - -
    __weakref__ = <attribute '__weakref__' of 'TestCase' objects>
    list of weak references to the object (if defined)
    - -
    failureException = <class exceptions.AssertionError>
    Assertion failed.
    - -

    - - - - - -
     
    -Functions
           
    getSuite()
    -

    - - - - - -
     
    -Data
           AnyInt = <Any Integer>
    -NullResult = <Null Children>
    -SPGenerator = <simpleparse.generator.Generator instance>
    -declaration = 'declarationset := declaration+\ndeclaration...,[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F]\n'
    -recursiveParser = <simpleparse.parser.Parser instance>
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test.html simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test.html 2006-02-19 01:05:16.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,51 +0,0 @@ - - -Python: module simpleparse.tests.test - - - - -
     
    - 
    simpleparse.tests.test
    index
    /home/mcfletch/pylive/simpleparse/tests/test.py
    -

    -

    - - - - - -
     
    -Modules
           
    simpleparse.stt.TextTools
    -simpleparse.tests.mx_test
    -string
    -sys
    -
    simpleparse.tests.test_common_chartypes
    -simpleparse.tests.test_common_comments
    -simpleparse.tests.test_common_iso_date
    -simpleparse.tests.test_common_numbers
    -
    simpleparse.tests.test_common_strings
    -simpleparse.tests.test_objectgenerator
    -simpleparse.tests.test_optimisation
    -simpleparse.tests.test_printers
    -
    simpleparse.tests.test_simpleparsegrammar
    -simpleparse.tests.test_xml
    -unittest
    -

    - - - - - -
     
    -Functions
           
    getSuite()
    -

    - - - - - -
     
    -Data
           mxVersion = ('2', '1', '0')
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_objectgenerator.html simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_objectgenerator.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_objectgenerator.html 2006-02-19 01:05:17.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_objectgenerator.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,515 +0,0 @@ - - -Python: module simpleparse.tests.test_objectgenerator - - - - -
     
    - 
    simpleparse.tests.test_objectgenerator
    index
    /home/mcfletch/pylive/simpleparse/tests/test_objectgenerator.py
    -

    -

    - - - - - -
     
    -Modules
           
    simpleparse.stt.TextTools.TextTools
    -copy
    -simpleparse.stt.TextTools.mxTextTools.mxTextTools
    -
    pprint
    -string
    -time
    -
    traceback
    -types
    -unittest
    -

    - - - - - -
     
    -Classes
           
    -
    TestCase(object) -
    -
    -
    ElementTokenTests -
    -
    -
    -

    - - - - - -
     
    -class ElementTokenTests(TestCase)
        
    Method resolution order:
    -
    ElementTokenTests
    -
    TestCase
    -
    object
    -
    -
    -Methods defined here:
    -
    doBasicTest(self, instance, testvalue, expected, startPosition=0)
    - -
    testCIString1(self)
    - -
    testCIString2(self)
    - -
    testCIString3(self)
    - -
    testCIString4(self)
    - -
    testCIString5(self)
    - -
    testFirstOf1(self)
    - -
    testFirstOf2(self)
    - -
    testFirstOf3(self)
    - -
    testFirstOf4(self)
    - -
    testFirstOf5(self)
    - -
    testFirstOf6(self)
    - -
    testNegative1(self)
    - -
    testNegative2(self)
    - -
    testNegative3(self)
    - -
    testNegative4(self)
    - -
    testNegative4a(self)
    - -
    testNegative4b(self)
    - -
    testNegative5(self)
    - -
    testNegative6(self)
    - -
    testNegative7(self)
    - -
    testRange1(self)
    - -
    testRange2(self)
    - -
    testRange3(self)
    - -
    testRange4(self)
    - -
    testRange5(self)
    - -
    testRange6(self)
    - -
    testRange7(self)
    - -
    testRange8(self)
    - -
    testRange9(self)
    - -
    testSequential1(self)
    - -
    testSequential2(self)
    - -
    testSequential3(self)
    - -
    testSequential4(self)
    - -
    testSequential5(self)
    - -
    testSequential6(self)
    - -
    testSequential7(self)
    - -
    testString1(self)
    - -
    testString2(self)
    - -
    testString3(self)
    - -
    testString4(self)
    - -
    testString5(self)
    - -
    testString6(self)
    - -
    testString7(self)
    - -
    testString8(self)
    Test repeating negative string
    - -
    testString9(self)
    - -
    -Methods inherited from TestCase:
    -
    __call__(self, *args, **kwds)
    - -
    __init__(self, methodName='runTest')
    Create an instance of the class that will use the named test
    -method when executed. Raises a ValueError if the instance does
    -not have a method with the specified name.
    - -
    __repr__(self)
    - -
    __str__(self)
    - -
    assertAlmostEqual = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertAlmostEquals = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertEqual = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertEquals = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertFalse = failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    assertNotAlmostEqual = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotAlmostEquals = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotEqual = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertNotEquals = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertRaises = failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    assertTrue = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    assert_ = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    countTestCases(self)
    - -
    debug(self)
    Run the test without collecting errors in a TestResult
    - -
    defaultTestResult(self)
    - -
    fail(self, msg=None)
    Fail immediately, with the given message.
    - -
    failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    id(self)
    - -
    run(self, result=None)
    - -
    setUp(self)
    Hook method for setting up the test fixture before exercising it.
    - -
    shortDescription(self)
    Returns a one-line description of the test, or None if no
    -description has been provided.

    -The default implementation of this method returns the first line of
    -the specified test method's docstring.
    - -
    tearDown(self)
    Hook method for deconstructing the test fixture after testing it.
    - -
    -Data and other attributes inherited from TestCase:
    -
    __dict__ = <dictproxy object>
    dictionary for instance variables (if defined)
    - -
    __weakref__ = <attribute '__weakref__' of 'TestCase' objects>
    list of weak references to the object (if defined)
    - -
    failureException = <class exceptions.AssertionError>
    Assertion failed.
    - -

    - - - - - -
     
    -Functions
           
    BMS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    CharSet(...)
    CharSet(definition)

    -Create a character set matching object from the string
    -
    FS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    FSType = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    TagTable(...)
    TagTable(definition[,cachable=1])
    -
    TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    UnicodeTagTable(...)
    TagTable(definition[,cachable=1])
    -
    charsplit(...)
    charsplit(text,char,start=0,stop=len(text))

    -Split text[start:stop] into substrings at char and
    -return the result as list of strings.
    -
    cmp(...)
    cmp(a,b)

    -Compare two valid taglist tuples w/r to their slice
    -position; this is useful for sorting joinlists.
    -
    getSuite()
    -
    hex2str(...)
    hex2str(text)

    -Return text interpreted as two byte HEX values converted
    -to a string.
    -
    isascii(...)
    isascii(text,start=0,stop=len(text))

    -Return 1/0 depending on whether text only contains ASCII
    -characters.
    -
    join(...)
    join(joinlist,sep='',start=0,stop=len(joinlist))

    -Copy snippets from different strings together producing a
    -new string
    -The first argument must be a list of tuples or strings;
    -tuples must be of the form (string,l,r[,...]) and turn out
    -as string[l:r]
    -NOTE: the syntax used for negative slices is different
    -than the Python standard: -1 corresponds to the first
    -character *after* the string, e.g. ('Example',0,-1) gives
    -'Example' and not 'Exampl', like in Python
    -sep is an optional separator string, start and stop
    -define the slice of joinlist that is taken into accont.
    -
    joinlist(...)
    joinlist(text,list,start=0,stop=len(text))

    -Takes a list of tuples (replacement,l,r,...) and produces
    -a taglist suitable for join() which creates a copy
    -of text where every slice [l:r] is replaced by the
    -given replacement
    -- the list must be sorted using cmp() as compare function
    -- it may not contain overlapping slices
    -- the slices may not contain negative indices
    -- if the taglist cannot contain overlapping slices, you can
    -  give this function the taglist produced by tag() directly
    -  (sorting is not needed, as the list will already be sorted)
    -- start and stop set the slice to work in, i.e. text[start:stop]
    -
    lower(...)
    lower(text)

    -Return text converted to lower case.
    -
    prefix(...)
    prefix(text,prefixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -prefix out of the tuple of strings given in prefixes.
    -If no prefix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    set(...)
    set(string,logic=1)

    -Returns a character set for string: a bit encoded version
    -of the characters occurring in string.
    -- logic can be set to 0 if all characters *not* in string
    -  should go into the set
    -
    setfind(...)
    setfind(text,set,start=0,stop=len(text))

    -Find the first occurence of any character from set in
    -text[start:stop]
    - set must be a string obtained with set()
    -DEPRECATED: use CharSet().search() instead.
    -
    setsplit(...)
    setsplit(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set,
    -omitting the splitting parts and empty substrings.
    -set must be a string obtained from set()
    -DEPRECATED: use CharSet().split() instead.
    -
    setsplitx(...)
    setsplitx(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set, so
    -that every second entry consists only of characters in set.
    -set must be a string obtained with set()
    -DEPRECATED: use CharSet().splitx() instead.
    -
    setstrip(...)
    setstrip(text,set,start=0,stop=len(text),mode=0)

    -Strip all characters in text[start:stop] appearing in set.
    -mode indicates where to strip (<0: left; =0: left and right;
    ->0: right). set must be a string obtained with set()
    -DEPRECATED: use CharSet().strip() instead.
    -
    splitat(...)
    splitat(text,char,nth=1,start=0,stop=len(text))

    -Split text[start:stop] into two substrings at the nth
    -occurance of char and return the result as 2-tuple. If the
    -character is not found, the second string is empty. nth may
    -be negative: the search is then done from the right and the
    -first string is empty in case the character is not found.
    -
    str2hex(...)
    str2hex(text)

    -Return text converted to a string consisting of two byte
    -HEX values.
    -
    suffix(...)
    suffix(text,suffixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -suffix out of the tuple of strings given in suffixes.
    -If no suffix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    tag(...)
    tag(text,tagtable,sliceleft=0,sliceright=len(text),taglist=[],context=None) 
    -Produce a tag list for a string, given a tag-table
    -- returns a tuple (success, taglist, nextindex)
    -- if taglist == None, then no taglist is created
    -
    upper(...)
    upper(text)

    -Return text converted to upper case.
    -

    - - - - - -
     
    -Data
           A2Z = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    -A2Z_charset = <Character Set object for 'A-Z'>
    -A2Z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -AllIn = 11
    -AllInCharSet = 41
    -AllInSet = 31
    -AllNotIn = 12
    -AnyInt = <Any Integer>
    -AppendMatch = 2048
    -AppendTagobj = 1024
    -AppendToTagobj = 512
    -BOYERMOORE = 0
    -Break = 0
    -Call = 201
    -CallArg = 202
    -CallTag = 256
    -EOF = 101
    -FASTSEARCH = 1
    -Fail = 100
    -Here = 1
    -Is = 13
    -IsIn = 14
    -IsInCharSet = 42
    -IsInSet = 32
    -IsNot = 15
    -IsNotIn = 15
    -Jump = 100
    -JumpTarget = 104
    -LookAhead = 4096
    -Loop = 205
    -LoopControl = 206
    -MatchFail = -1000000
    -MatchOk = 1000000
    -Move = 103
    -NoWord = 211
    -NullResult = <Null Children>
    -Reset = -1
    -Skip = 102
    -SubTable = 207
    -SubTableInList = 208
    -TRIVIAL = 2
    -Table = 203
    -TableInList = 204
    -ThisTable = 999
    -To = 0
    -ToBOF = 0
    -ToEOF = -1
    -Umlaute = '\xc4\xd6\xdc'
    -Umlaute_charset = <Character Set object for '\xc4\xd6\xdc'>
    -Word = 21
    -WordEnd = 23
    -WordStart = 22
    -a2z = 'abcdefghijklmnopqrstuvwxyz'
    -a2z_charset = <Character Set object for 'a-z'>
    -a2z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
    -alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'>
    -alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alphanumeric = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
    -alphanumeric_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'>
    -alphanumeric_set = '\x00\x00\x00\x00\x00\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -any = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -any_charset = <Character Set object for '\x00-\xff'>
    -any_set = '\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -formfeed = '\x0c'
    -formfeed_charset = <Character Set object for '\x0c'>
    -german_alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'
    -german_alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTU...hijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'>
    -german_alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00@\x90\x10\x00@\x10'
    -id2cmd = {-1000000: 'MatchFail', -1: 'ToEOF', 0: 'Fail/Jump', 1: 'Here', 11: 'AllIn', 12: 'AllNotIn', 13: 'Is', 14: 'IsIn', 15: 'IsNotIn', 21: 'Word', ...}
    -newline = '\r\n'
    -newline_charset = <Character Set object for '\r\n'>
    -newline_set = '\x00$\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -nonwhitespace_charset = <Character Set object for '^ \t\x0b\r\n\x0c'>
    -nonwhitespace_set = '\xff\xc1\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -number = '0123456789'
    -number_charset = <Character Set object for '0-9'>
    -number_set = '\x00\x00\x00\x00\x00\x00\xff\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -sFindWord = 213
    -sWordEnd = 212
    -sWordStart = 211
    -tagtable_cache = {(46912536021760, 0): <String Tag Table object>, (46912540134840, 0): <String Tag Table object>, (46912541410080, 0): <String Tag Table object>, (46912541454848, 0): <String Tag Table object>, (46912541455136, 0): <String Tag Table object>, (46912541455208, 0): <String Tag Table object>, (46912541489264, 0): <String Tag Table object>, (46912541566016, 0): <String Tag Table object>, (46912543903688, 0): <String Tag Table object>, (46912543908136, 0): <String Tag Table object>, ...}
    -to_lower = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -to_upper = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -umlaute = '\xe4\xf6\xfc\xdf'
    -umlaute_charset = <Character Set object for '\xe4\xf6\xfc\xdf'>
    -white = ' \t\x0b'
    -white_charset = <Character Set object for ' \t\x0b'>
    -white_set = '\x00\x02\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -whitespace = ' \t\x0b\r\n\x0c'
    -whitespace_charset = <Character Set object for ' \t\x0b\r\n\x0c'>
    -whitespace_set = '\x00&\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_optimisation.html simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_optimisation.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_optimisation.html 2006-02-19 01:05:17.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_optimisation.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,212 +0,0 @@ - - -Python: module simpleparse.tests.test_optimisation - - - - -
     
    - 
    simpleparse.tests.test_optimisation
    index
    /home/mcfletch/pylive/simpleparse/tests/test_optimisation.py
    -

    -

    - - - - - -
     
    -Modules
           
    pprint
    -
    simpleparse.printers
    -
    traceback
    -
    unittest
    -

    - - - - - -
     
    -Classes
           
    -
    TestCase(object) -
    -
    -
    OptimisationTests -
    -
    -
    -

    - - - - - -
     
    -class OptimisationTests(TestCase)
        
    Method resolution order:
    -
    OptimisationTests
    -
    TestCase
    -
    object
    -
    -
    -Methods defined here:
    -
    testNameCollapseForSimple(self)
    Test that a name reference, given a single-item reporting avoids extra table
    - -
    testNoReportPassDown(self)
    Test that a non-reporting production does not produce reporting sub-productions
    - -
    testTermCompression(self)
    Test that unreported productions are compressed

    -Term compression is basically an inlining of terminal
    -expressions into the calling table.  At the moment
    -the terminal expressions are all duplicated, which may
    -balloon the size of the grammar, not sure if this will
    -be an actual problem.  As written, this optimization
    -should provide a significant speed up, but there may
    -the even more of a speed up if we allow for sharing
    -the terminal tuples as well.

    -This:
    -        a:=b <b>:= -c* c:='this'
    -Should eventually compress to this:
    -        a := -'this'*
    - -
    testTermSharing(self)
    Test that shared terminal productions are using the same parser
    - -
    -Methods inherited from TestCase:
    -
    __call__(self, *args, **kwds)
    - -
    __init__(self, methodName='runTest')
    Create an instance of the class that will use the named test
    -method when executed. Raises a ValueError if the instance does
    -not have a method with the specified name.
    - -
    __repr__(self)
    - -
    __str__(self)
    - -
    assertAlmostEqual = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertAlmostEquals = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertEqual = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertEquals = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertFalse = failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    assertNotAlmostEqual = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotAlmostEquals = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotEqual = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertNotEquals = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertRaises = failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    assertTrue = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    assert_ = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    countTestCases(self)
    - -
    debug(self)
    Run the test without collecting errors in a TestResult
    - -
    defaultTestResult(self)
    - -
    fail(self, msg=None)
    Fail immediately, with the given message.
    - -
    failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    id(self)
    - -
    run(self, result=None)
    - -
    setUp(self)
    Hook method for setting up the test fixture before exercising it.
    - -
    shortDescription(self)
    Returns a one-line description of the test, or None if no
    -description has been provided.

    -The default implementation of this method returns the first line of
    -the specified test method's docstring.
    - -
    tearDown(self)
    Hook method for deconstructing the test fixture after testing it.
    - -
    -Data and other attributes inherited from TestCase:
    -
    __dict__ = <dictproxy object>
    dictionary for instance variables (if defined)
    - -
    __weakref__ = <attribute '__weakref__' of 'TestCase' objects>
    list of weak references to the object (if defined)
    - -
    failureException = <class exceptions.AssertionError>
    Assertion failed.
    - -

    - - - - - -
     
    -Functions
           
    _rcmp(item1, item2)
    -
    getSuite()
    -
    rcmp(table1, table2)
    Silly utility function to get around text search object lack of __cmp__
    -
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_printers_garbage.html simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_printers_garbage.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_printers_garbage.html 2006-02-19 01:05:17.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_printers_garbage.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,288 +0,0 @@ - - -Python: module simpleparse.tests.test_printers_garbage - - - - -
     
    - 
    simpleparse.tests.test_printers_garbage
    index
    /home/mcfletch/pylive/simpleparse/tests/test_printers_garbage.pyc
    -

    -

    - - - - - -
     
    -Modules
           
    copy
    -simpleparse.generator
    -
    simpleparse.stt.TextTools.mxTextTools.mxTextTools
    -string
    -
    time
    -types
    -

    - - - - - -
     
    -Classes
           
    -
    Parser -
    -

    - - - - - - - -
     
    -class Parser
       Mix-in class for simpleparse.parser.Parser which uses this GENERATOR
    -to build tagging tables.  You'll likely want to override __init__ to avoid
    -building a new parser from a grammar (or subclass BaseParser instead of
    -Parser)
     
     Methods defined here:
    -
    buildTagger(self, name=None, processor=None)
    Build the tag-table for parsing the EBNF for this parser
    - -

    - - - - - -
     
    -Functions
           
    BMS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    CharSet(...)
    CharSet(definition)

    -Create a character set matching object from the string
    -
    FS = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    FSType = TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    TagTable(...)
    TagTable(definition[,cachable=1])
    -
    TextSearch(...)
    TextSearch(match[,translate=None,algorithm=default_algorithm])

    -Create a substring search object for the string match;
    -translate is an optional translate-string like the one used
    -in the module re.
    -
    UnicodeTagTable(...)
    TagTable(definition[,cachable=1])
    -
    charsplit(...)
    charsplit(text,char,start=0,stop=len(text))

    -Split text[start:stop] into substrings at char and
    -return the result as list of strings.
    -
    cmp(...)
    cmp(a,b)

    -Compare two valid taglist tuples w/r to their slice
    -position; this is useful for sorting joinlists.
    -
    hex2str(...)
    hex2str(text)

    -Return text interpreted as two byte HEX values converted
    -to a string.
    -
    isascii(...)
    isascii(text,start=0,stop=len(text))

    -Return 1/0 depending on whether text only contains ASCII
    -characters.
    -
    join(...)
    join(joinlist,sep='',start=0,stop=len(joinlist))

    -Copy snippets from different strings together producing a
    -new string
    -The first argument must be a list of tuples or strings;
    -tuples must be of the form (string,l,r[,...]) and turn out
    -as string[l:r]
    -NOTE: the syntax used for negative slices is different
    -than the Python standard: -1 corresponds to the first
    -character *after* the string, e.g. ('Example',0,-1) gives
    -'Example' and not 'Exampl', like in Python
    -sep is an optional separator string, start and stop
    -define the slice of joinlist that is taken into accont.
    -
    joinlist(...)
    joinlist(text,list,start=0,stop=len(text))

    -Takes a list of tuples (replacement,l,r,...) and produces
    -a taglist suitable for join() which creates a copy
    -of text where every slice [l:r] is replaced by the
    -given replacement
    -- the list must be sorted using cmp() as compare function
    -- it may not contain overlapping slices
    -- the slices may not contain negative indices
    -- if the taglist cannot contain overlapping slices, you can
    -  give this function the taglist produced by tag() directly
    -  (sorting is not needed, as the list will already be sorted)
    -- start and stop set the slice to work in, i.e. text[start:stop]
    -
    lower(...)
    lower(text)

    -Return text converted to lower case.
    -
    prefix(...)
    prefix(text,prefixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -prefix out of the tuple of strings given in prefixes.
    -If no prefix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    set(...)
    set(string,logic=1)

    -Returns a character set for string: a bit encoded version
    -of the characters occurring in string.
    -- logic can be set to 0 if all characters *not* in string
    -  should go into the set
    -
    setfind(...)
    setfind(text,set,start=0,stop=len(text))

    -Find the first occurence of any character from set in
    -text[start:stop]
    - set must be a string obtained with set()
    -DEPRECATED: use CharSet().search() instead.
    -
    setsplit(...)
    setsplit(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set,
    -omitting the splitting parts and empty substrings.
    -set must be a string obtained from set()
    -DEPRECATED: use CharSet().split() instead.
    -
    setsplitx(...)
    setsplitx(text,set,start=0,stop=len(text))

    -Split text[start:stop] into substrings using set, so
    -that every second entry consists only of characters in set.
    -set must be a string obtained with set()
    -DEPRECATED: use CharSet().splitx() instead.
    -
    setstrip(...)
    setstrip(text,set,start=0,stop=len(text),mode=0)

    -Strip all characters in text[start:stop] appearing in set.
    -mode indicates where to strip (<0: left; =0: left and right;
    ->0: right). set must be a string obtained with set()
    -DEPRECATED: use CharSet().strip() instead.
    -
    splitat(...)
    splitat(text,char,nth=1,start=0,stop=len(text))

    -Split text[start:stop] into two substrings at the nth
    -occurance of char and return the result as 2-tuple. If the
    -character is not found, the second string is empty. nth may
    -be negative: the search is then done from the right and the
    -first string is empty in case the character is not found.
    -
    str2hex(...)
    str2hex(text)

    -Return text converted to a string consisting of two byte
    -HEX values.
    -
    suffix(...)
    suffix(text,suffixes,start=0,stop=len(text)[,translate])

    -Looks at text[start:stop] and returns the first matching
    -suffix out of the tuple of strings given in suffixes.
    -If no suffix is found to be matching, None is returned.
    -The optional 256 char translate string is used to translate
    -the text prior to comparing it with the given suffixes.
    -
    tag(...)
    tag(text,tagtable,sliceleft=0,sliceright=len(text),taglist=[],context=None) 
    -Produce a tag list for a string, given a tag-table
    -- returns a tuple (success, taglist, nextindex)
    -- if taglist == None, then no taglist is created
    -
    upper(...)
    upper(text)

    -Return text converted to upper case.
    -

    - - - - - -
     
    -Data
           A2Z = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    -A2Z_charset = <Character Set object for 'A-Z'>
    -A2Z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -AllIn = 11
    -AllInCharSet = 41
    -AllInSet = 31
    -AllNotIn = 12
    -AppendMatch = 2048
    -AppendTagobj = 1024
    -AppendToTagobj = 512
    -BOYERMOORE = 0
    -Break = 0
    -Call = 201
    -CallArg = 202
    -CallTag = 256
    -EOF = 101
    -FASTSEARCH = 1
    -Fail = 100
    -GENERATOR = <simpleparse.generator.Generator instance>
    -Here = 1
    -Is = 13
    -IsIn = 14
    -IsInCharSet = 42
    -IsInSet = 32
    -IsNot = 15
    -IsNotIn = 15
    -Jump = 100
    -JumpTarget = 104
    -LookAhead = 4096
    -Loop = 205
    -LoopControl = 206
    -MatchFail = -1000000
    -MatchOk = 1000000
    -Move = 103
    -NoWord = 211
    -Reset = -1
    -Skip = 102
    -SubTable = 207
    -SubTableInList = 208
    -TRIVIAL = 2
    -Table = 203
    -TableInList = 204
    -ThisTable = 999
    -To = 0
    -ToBOF = 0
    -ToEOF = -1
    -Umlaute = '\xc4\xd6\xdc'
    -Umlaute_charset = <Character Set object for '\xc4\xd6\xdc'>
    -Word = 21
    -WordEnd = 23
    -WordStart = 22
    -a2z = 'abcdefghijklmnopqrstuvwxyz'
    -a2z_charset = <Character Set object for 'a-z'>
    -a2z_set = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
    -alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'>
    -alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -alphanumeric = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
    -alphanumeric_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'>
    -alphanumeric_set = '\x00\x00\x00\x00\x00\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -any = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -any_charset = <Character Set object for '\x00-\xff'>
    -any_set = '\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -formfeed = '\x0c'
    -formfeed_charset = <Character Set object for '\x0c'>
    -german_alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'
    -german_alpha_charset = <Character Set object for 'ABCDEFGHIJKLMNOPQRSTU...hijklmnopqrstuvwxyz\xe4\xf6\xfc\xdf\xc4\xd6\xdc'>
    -german_alpha_set = '\x00\x00\x00\x00\x00\x00\x00\x00\xfe\xff\xff\x07\xfe\xff\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00@\x90\x10\x00@\x10'
    -id2cmd = {-1000000: 'MatchFail', -1: 'ToEOF', 0: 'Fail/Jump', 1: 'Here', 11: 'AllIn', 12: 'AllNotIn', 13: 'Is', 14: 'IsIn', 15: 'IsNotIn', 21: 'Word', ...}
    -newline = '\r\n'
    -newline_charset = <Character Set object for '\r\n'>
    -newline_set = '\x00$\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -nonwhitespace_charset = <Character Set object for '^ \t\x0b\r\n\x0c'>
    -nonwhitespace_set = '\xff\xc1\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
    -number = '0123456789'
    -number_charset = <Character Set object for '0-9'>
    -number_set = '\x00\x00\x00\x00\x00\x00\xff\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -sFindWord = 213
    -sWordEnd = 212
    -sWordStart = 211
    -tagtable_cache = {(46912536021760, 0): <String Tag Table object>, (46912540134840, 0): <String Tag Table object>, (46912541410080, 0): <String Tag Table object>, (46912541454848, 0): <String Tag Table object>, (46912541455136, 0): <String Tag Table object>, (46912541455208, 0): <String Tag Table object>, (46912541489264, 0): <String Tag Table object>, (46912541566016, 0): <String Tag Table object>, (46912543903688, 0): <String Tag Table object>, (46912543908136, 0): <String Tag Table object>, ...}
    -to_lower = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -to_upper = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./...\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
    -umlaute = '\xe4\xf6\xfc\xdf'
    -umlaute_charset = <Character Set object for '\xe4\xf6\xfc\xdf'>
    -white = ' \t\x0b'
    -white_charset = <Character Set object for ' \t\x0b'>
    -white_set = '\x00\x02\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    -whitespace = ' \t\x0b\r\n\x0c'
    -whitespace_charset = <Character Set object for ' \t\x0b\r\n\x0c'>
    -whitespace_set = '\x00&\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_printers.html simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_printers.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_printers.html 2006-02-19 01:05:17.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_printers.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,382 +0,0 @@ - - -Python: module simpleparse.tests.test_printers - - - - -
     
    - 
    simpleparse.tests.test_printers
    index
    /home/mcfletch/pylive/simpleparse/tests/test_printers.py
    -

    Test the print-to-python-file module

    -This just uses the simpleparsegrammar declaration, which is
    -parsed, then linearised, then loaded as a Python module.

    -

    - - - - - -
     
    -Modules
           
    os
    -
    simpleparse.tests.test_grammarparser
    -
    unittest
    -

    - - - - - -
     
    -Classes
           
    -
    SimpleParseGrammarTests(TestCase) -
    -
    -
    PrintersTests -
    -
    -
    -

    - - - - - -
     
    -class PrintersTests(SimpleParseGrammarTests)
        
    Method resolution order:
    -
    PrintersTests
    -
    SimpleParseGrammarTests
    -
    TestCase
    -
    object
    -
    -
    -Methods defined here:
    -
    doBasicTest(self, parserName, testValue, expected)
    - -
    setUp(self)
    - -
    tearDown(self)
    - -
    -Methods inherited from SimpleParseGrammarTests:
    -
    testChar1(self)
    - -
    testChar2(self)
    - -
    testChar3(self)
    - -
    testChar4(self)
    - -
    testChar5(self)
    - -
    testChar6(self)
    - -
    testChar7(self)
    - -
    testChar8(self)
    - -
    testChar85(self)
    - -
    testChar9(self)
    - -
    testCharNoBrace1(self)
    - -
    testCharRange1(self)
    - -
    testComment1(self)
    - -
    testComment2(self)
    - -
    testCutToken2(self)
    - -
    testCutToken3(self)
    - -
    testCutToken4(self)
    - -
    testDeclaration(self)
    - -
    testDeclaration2(self)
    - -
    testDeclaration3(self)
    - -
    testDeclaration4(self)
    - -
    testDeclaration5(self)
    - -
    testDeclarationSet1(self)
    - -
    testDeclarationSet2(self)
    Just tries to parse and sees that everything was parsed, doesn't predict the result
    - -
    testElementToken1(self)
    - -
    testElementToken2(self)
    - -
    testElementToken3(self)
    - -
    testElementToken4(self)
    - -
    testElementToken5(self)
    - -
    testElementToken6(self)
    Lookahead indicator with positive
    - -
    testElementToken7(self)
    Lookahead indicator with negative
    - -
    testElementToken8(self)
    Error on fail indicator
    - -
    testElementToken9(self)
    Error on fail indicator with message
    - -
    testErrorOnFailFlag1(self)
    - -
    testExpandedName1(self)
    - -
    testExpandedName2(self)
    - -
    testFOGroup1(self)
    - -
    testGroup1(self)
    - -
    testGroup2(self)
    - -
    testGroup3(self)
    Test group with sequential added group
    -Note that this test also serves to test
    -the function of non-reporting names
    - -
    testGroup4(self)
    Test group with sequential added group
    -Note that this test also serves to test
    -the function of non-reporting names
    - -
    testGroup5(self)
    Test group with sequential added group
    -Note that this test also serves to test
    -the function of non-reporting names
    - -
    testGroup6(self)
    Test group with multiple / 'd values
    - -
    testLiteral1(self)
    - -
    testLiteral2(self)
    - -
    testLiteral3(self)
    - -
    testLiteral4(self)
    - -
    testLiteral5(self)
    - -
    testLiteral6(self)
    - -
    testLiteralDecorator(self)
    - -
    testLiteralDecorator2(self)
    - -
    testLiteralDecorator3(self)
    Decorator must be right next to literal, no whitespace
    - -
    testLookAheadIndicator1(self)
    - -
    testLookAheadIndicator2(self)
    - -
    testName1(self)
    - -
    testName2(self)
    - -
    testName3(self)
    - -
    testNegposIndicator1(self)
    - -
    testNegposIndicator2(self)
    - -
    testOccurenceIndicator1(self)
    - -
    testOccurenceIndicator2(self)
    - -
    testOccurenceIndicator3(self)
    - -
    testOccurenceIndicator4(self)
    - -
    testOccurenceIndicator5(self)
    - -
    testRange1(self)
    - -
    testRange2(self)
    - -
    testRange3(self)
    - -
    testRange4(self)
    Test optional repeating children running into eof

    -Original SimpleParse had a major failure here,
    -system hung trying to parse the [] string.  Basically,
    -there was no check for EOF during a repeating-item
    -parse (save for literals and character sets), so you
    -wound up with infinite loops.
    - -
    testRange5(self)
    Test optional repeating children with no termination

    -Original SimpleParse had a major failure here,
    -system hung trying to parse the [] string.  Basically,
    -there was no check for EOF during a repeating-item
    -parse (save for literals and character sets), so you
    -wound up with infinite loops.
    - -
    testSEQGroup1(self)
    - -
    testSEQToken1(self)
    - -
    testSeqGroup2(self)
    - -
    testUnreportedName1(self)
    - -
    testUnreportedName2(self)
    - -
    testWhitespace1(self)
    - -
    testWhitespace2(self)
    - -
    testWhitespace3(self)
    - -
    testWhitespace4(self)
    - -
    testWhitespace5(self)
    Bug in 2.0.0 where Null comments such as:
    -                "#
    -"

    -                didn't parse.
    - -
    -Methods inherited from TestCase:
    -
    __call__(self, *args, **kwds)
    - -
    __init__(self, methodName='runTest')
    Create an instance of the class that will use the named test
    -method when executed. Raises a ValueError if the instance does
    -not have a method with the specified name.
    - -
    __repr__(self)
    - -
    __str__(self)
    - -
    assertAlmostEqual = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertAlmostEquals = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertEqual = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertEquals = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertFalse = failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    assertNotAlmostEqual = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotAlmostEquals = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotEqual = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertNotEquals = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertRaises = failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    assertTrue = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    assert_ = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    countTestCases(self)
    - -
    debug(self)
    Run the test without collecting errors in a TestResult
    - -
    defaultTestResult(self)
    - -
    fail(self, msg=None)
    Fail immediately, with the given message.
    - -
    failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    id(self)
    - -
    run(self, result=None)
    - -
    shortDescription(self)
    Returns a one-line description of the test, or None if no
    -description has been provided.

    -The default implementation of this method returns the first line of
    -the specified test method's docstring.
    - -
    -Data and other attributes inherited from TestCase:
    -
    __dict__ = <dictproxy object>
    dictionary for instance variables (if defined)
    - -
    __weakref__ = <attribute '__weakref__' of 'TestCase' objects>
    list of weak references to the object (if defined)
    - -
    failureException = <class exceptions.AssertionError>
    Assertion failed.
    - -

    - - - - - -
     
    -Functions
           
    getSuite()
    -

    - - - - - -
     
    -Data
           testModuleFile = 'test_printers_garbage.py'
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_simpleparsegrammar.html simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_simpleparsegrammar.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_simpleparsegrammar.html 2006-02-19 01:05:17.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_simpleparsegrammar.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,611 +0,0 @@ - - -Python: module simpleparse.tests.test_simpleparsegrammar - - - - -
     
    - 
    simpleparse.tests.test_simpleparsegrammar
    index
    /home/mcfletch/pylive/simpleparse/tests/test_simpleparsegrammar.py
    -

    -

    - - - - - -
     
    -Modules
           
    simpleparse.stt.TextTools.TextTools
    -pprint
    -
    simpleparse.tests.test_erroronfail
    -simpleparse.tests.test_grammarparser
    -
    unittest
    -

    - - - - - -
     
    -Classes
           
    -
    AppendToTagobjMethodSource -
    BasicMethodSource -
    TestCase(object) -
    -
    -
    CallTests -
    NameTests -
    ParserGenerationTests -
    -
    -
    -

    - - - - - -
     
    -class AppendToTagobjMethodSource
        Methods defined here:
    -
    __init__(self)
    - -

    - - - - - -
     
    -class BasicMethodSource
        Methods defined here:
    -
    __init__(self)
    - -

    - - - - - - - -
     
    -class CallTests(TestCase)
       Tests semantics of calling objects from a method source during parsing
     
     
    Method resolution order:
    -
    CallTests
    -
    TestCase
    -
    object
    -
    -
    -Methods defined here:
    -
    parse(self, definition, parserName, testValue, source)
    - -
    test_AppendMatch(self)
    Test ability to append the text-string match to the results list
    - -
    test_AppendTagObj(self)
    Test appending the tagobject to the results list
    - -
    test_AppendToTagObj(self)
    Test basic ability to call a method instead of regular functioning
    - -
    test_basic_call(self)
    Test basic ability to call a method instead of regular functioning
    - -
    -Methods inherited from TestCase:
    -
    __call__(self, *args, **kwds)
    - -
    __init__(self, methodName='runTest')
    Create an instance of the class that will use the named test
    -method when executed. Raises a ValueError if the instance does
    -not have a method with the specified name.
    - -
    __repr__(self)
    - -
    __str__(self)
    - -
    assertAlmostEqual = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertAlmostEquals = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertEqual = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertEquals = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertFalse = failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    assertNotAlmostEqual = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotAlmostEquals = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotEqual = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertNotEquals = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertRaises = failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    assertTrue = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    assert_ = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    countTestCases(self)
    - -
    debug(self)
    Run the test without collecting errors in a TestResult
    - -
    defaultTestResult(self)
    - -
    fail(self, msg=None)
    Fail immediately, with the given message.
    - -
    failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    id(self)
    - -
    run(self, result=None)
    - -
    setUp(self)
    Hook method for setting up the test fixture before exercising it.
    - -
    shortDescription(self)
    Returns a one-line description of the test, or None if no
    -description has been provided.

    -The default implementation of this method returns the first line of
    -the specified test method's docstring.
    - -
    tearDown(self)
    Hook method for deconstructing the test fixture after testing it.
    - -
    -Data and other attributes inherited from TestCase:
    -
    __dict__ = <dictproxy object>
    dictionary for instance variables (if defined)
    - -
    __weakref__ = <attribute '__weakref__' of 'TestCase' objects>
    list of weak references to the object (if defined)
    - -
    failureException = <class exceptions.AssertionError>
    Assertion failed.
    - -

    - - - - - -
     
    -class NameTests(TestCase)
        
    Method resolution order:
    -
    NameTests
    -
    TestCase
    -
    object
    -
    -
    -Methods defined here:
    -
    doBasicTest(self, definition, parserName, testValue, expected)
    - -
    test_expanded_SingleNameChild(self)
    Expanded group with single child which is a Name itself

    -This originally failed when the Name object's report value
    -was changed to 0 (redundant information for the "expanded" code),
    -resulting in the child production not getting reported.
    - -
    test_expanded_name(self)
    Non-reporting (expanded) name test

    -Tests new feature, a name whose children
    -are reported, but which is not itself reported,
    -basically this lets you create anonymous
    -groups which can be referenced from other
    -productions.
    - -
    test_n(self)
    - -
    test_n_f(self)
    - -
    test_no(self)
    - -
    test_no_f(self)
    - -
    test_nor(self)
    - -
    test_nor_f(self)
    - -
    test_nr(self)
    - -
    test_nr_f(self)
    - -
    test_p(self)
    - -
    test_po(self)
    - -
    test_por(self)
    - -
    test_pr(self)
    - -
    -Methods inherited from TestCase:
    -
    __call__(self, *args, **kwds)
    - -
    __init__(self, methodName='runTest')
    Create an instance of the class that will use the named test
    -method when executed. Raises a ValueError if the instance does
    -not have a method with the specified name.
    - -
    __repr__(self)
    - -
    __str__(self)
    - -
    assertAlmostEqual = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertAlmostEquals = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertEqual = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertEquals = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertFalse = failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    assertNotAlmostEqual = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotAlmostEquals = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotEqual = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertNotEquals = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertRaises = failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    assertTrue = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    assert_ = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    countTestCases(self)
    - -
    debug(self)
    Run the test without collecting errors in a TestResult
    - -
    defaultTestResult(self)
    - -
    fail(self, msg=None)
    Fail immediately, with the given message.
    - -
    failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    id(self)
    - -
    run(self, result=None)
    - -
    setUp(self)
    Hook method for setting up the test fixture before exercising it.
    - -
    shortDescription(self)
    Returns a one-line description of the test, or None if no
    -description has been provided.

    -The default implementation of this method returns the first line of
    -the specified test method's docstring.
    - -
    tearDown(self)
    Hook method for deconstructing the test fixture after testing it.
    - -
    -Data and other attributes inherited from TestCase:
    -
    __dict__ = <dictproxy object>
    dictionary for instance variables (if defined)
    - -
    __weakref__ = <attribute '__weakref__' of 'TestCase' objects>
    list of weak references to the object (if defined)
    - -
    failureException = <class exceptions.AssertionError>
    Assertion failed.
    - -

    - - - - - -
     
    -class ParserGenerationTests(TestCase)
        
    Method resolution order:
    -
    ParserGenerationTests
    -
    TestCase
    -
    object
    -
    -
    -Methods defined here:
    -
    doBasicTest(self, definition, parserName, testValue, expected)
    - -
    testGenCILiteral1(self)
    - -
    testGenCILiteral2(self)
    - -
    testGenCILiteral3(self)
    - -
    testGenCILiteral4(self)
    - -
    testGenCILiteral5(self)
    - -
    testGenCILiteral6(self)
    - -
    testGenNegLit1(self)
    - -
    testGenNegRange1(self)
    - -
    testGenNegRange2(self)
    - -
    testGenPos1(self)
    - -
    testGenPos2(self)
    - -
    testGenPosRept1(self)
    - -
    testGenPosReptOpt1(self)
    - -
    testGenPosReptOpt2(self)
    - -
    testLookahead2(self)
    Test lookahead on literals (more complex)
    - -
    testLookahead3(self)
    Test lookahead on reported positive productions
    - -
    testLookahead4(self)
    - -
    testLookahead5(self)
    - -
    testLookahead6(self)
    - -
    testLookaheadNeg(self)
    - -
    testLookaheadNeg2(self)
    - -
    testLookaheadNeg3(self)
    - -
    testLookaheadNeg4(self)
    - -
    testLookaheadNeg5(self)
    - -
    testLookaheadNeg6(self)
    - -
    testLookaheadPositive(self)
    - -
    testMultiLineDef(self)
    Test multi-line definitions
    - -
    testOptionalGroupHitEOF(self)
    Test optional group hitting an EOF during success run
    - -
    -Methods inherited from TestCase:
    -
    __call__(self, *args, **kwds)
    - -
    __init__(self, methodName='runTest')
    Create an instance of the class that will use the named test
    -method when executed. Raises a ValueError if the instance does
    -not have a method with the specified name.
    - -
    __repr__(self)
    - -
    __str__(self)
    - -
    assertAlmostEqual = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertAlmostEquals = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertEqual = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertEquals = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertFalse = failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    assertNotAlmostEqual = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotAlmostEquals = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotEqual = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertNotEquals = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertRaises = failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    assertTrue = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    assert_ = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    countTestCases(self)
    - -
    debug(self)
    Run the test without collecting errors in a TestResult
    - -
    defaultTestResult(self)
    - -
    fail(self, msg=None)
    Fail immediately, with the given message.
    - -
    failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    id(self)
    - -
    run(self, result=None)
    - -
    setUp(self)
    Hook method for setting up the test fixture before exercising it.
    - -
    shortDescription(self)
    Returns a one-line description of the test, or None if no
    -description has been provided.

    -The default implementation of this method returns the first line of
    -the specified test method's docstring.
    - -
    tearDown(self)
    Hook method for deconstructing the test fixture after testing it.
    - -
    -Data and other attributes inherited from TestCase:
    -
    __dict__ = <dictproxy object>
    dictionary for instance variables (if defined)
    - -
    __weakref__ = <attribute '__weakref__' of 'TestCase' objects>
    list of weak references to the object (if defined)
    - -
    failureException = <class exceptions.AssertionError>
    Assertion failed.
    - -

    - - - - - -
     
    -Functions
           
    getSuite()
    -

    - - - - - -
     
    -Data
           AnyInt = <Any Integer>
    -NullResult = <Null Children>
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_xml.html simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_xml.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.tests.test_xml.html 2006-02-19 01:05:17.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.tests.test_xml.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,247 +0,0 @@ - - -Python: module simpleparse.tests.test_xml - - - - -
     
    - 
    simpleparse.tests.test_xml
    index
    /home/mcfletch/pylive/simpleparse/tests/test_xml.py
    -

    -

    - - - - - -
     
    -Modules
           
    string
    -
    unittest
    -
    simpleparse.xml.xml_parser
    -

    - - - - - -
     
    -Classes
           
    -
    ProductionTest -
    TestCase(object) -
    -
    -
    XMLProductionTests -
    -
    -
    -

    - - - - - -
     
    -class ProductionTest
        Methods defined here:
    -
    __call__(self)
    Perform the test
    - -
    __init__(self, production, should, shouldnot)
    - -

    - - - - - - - -
     
    -class XMLProductionTests(TestCase)
       Tests that XML grammar productions match appropriate values
     
     
    Method resolution order:
    -
    XMLProductionTests
    -
    TestCase
    -
    object
    -
    -
    -Data and other attributes defined here:
    -
    testAttDef = <simpleparse.tests.test_xml.ProductionTest instance>
    - -
    testAttValue = <simpleparse.tests.test_xml.ProductionTest instance>
    - -
    testAttlistDecl = <simpleparse.tests.test_xml.ProductionTest instance>
    - -
    testAttribute = <simpleparse.tests.test_xml.ProductionTest instance>
    - -
    testCharData = <simpleparse.tests.test_xml.ProductionTest instance>
    - -
    testComment = <simpleparse.tests.test_xml.ProductionTest instance>
    - -
    testEntityDecl = <simpleparse.tests.test_xml.ProductionTest instance>
    - -
    testEntityDef = <simpleparse.tests.test_xml.ProductionTest instance>
    - -
    testExternalID = <simpleparse.tests.test_xml.ProductionTest instance>
    - -
    testName = <simpleparse.tests.test_xml.ProductionTest instance>
    - -
    testPubidLiteral = <simpleparse.tests.test_xml.ProductionTest instance>
    - -
    testcontent = <simpleparse.tests.test_xml.ProductionTest instance>
    - -
    testcontentspec = <simpleparse.tests.test_xml.ProductionTest instance>
    - -
    testelement = <simpleparse.tests.test_xml.ProductionTest instance>
    - -
    testelementdecl = <simpleparse.tests.test_xml.ProductionTest instance>
    - -
    testelementdecl_pe = <simpleparse.tests.test_xml.ProductionTest instance>
    - -
    testprolog = <simpleparse.tests.test_xml.ProductionTest instance>
    - -
    -Methods inherited from TestCase:
    -
    __call__(self, *args, **kwds)
    - -
    __init__(self, methodName='runTest')
    Create an instance of the class that will use the named test
    -method when executed. Raises a ValueError if the instance does
    -not have a method with the specified name.
    - -
    __repr__(self)
    - -
    __str__(self)
    - -
    assertAlmostEqual = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertAlmostEquals = failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertEqual = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertEquals = failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    assertFalse = failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    assertNotAlmostEqual = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotAlmostEquals = failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    assertNotEqual = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertNotEquals = failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    assertRaises = failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    assertTrue = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    assert_ = failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    countTestCases(self)
    - -
    debug(self)
    Run the test without collecting errors in a TestResult
    - -
    defaultTestResult(self)
    - -
    fail(self, msg=None)
    Fail immediately, with the given message.
    - -
    failIf(self, expr, msg=None)
    Fail the test if the expression is true.
    - -
    failIfAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are equal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failIfEqual(self, first, second, msg=None)
    Fail if the two objects are equal as determined by the '=='
    -operator.
    - -
    failUnless(self, expr, msg=None)
    Fail the test unless the expression is true.
    - -
    failUnlessAlmostEqual(self, first, second, places=7, msg=None)
    Fail if the two objects are unequal as determined by their
    -difference rounded to the given number of decimal places
    -(default 7) and comparing to zero.

    -Note that decimal places (from zero) are usually not the same
    -as significant digits (measured from the most signficant digit).
    - -
    failUnlessEqual(self, first, second, msg=None)
    Fail if the two objects are unequal as determined by the '=='
    -operator.
    - -
    failUnlessRaises(self, excClass, callableObj, *args, **kwargs)
    Fail unless an exception of class excClass is thrown
    -by callableObj when invoked with arguments args and keyword
    -arguments kwargs. If a different type of exception is
    -thrown, it will not be caught, and the test case will be
    -deemed to have suffered an error, exactly as for an
    -unexpected exception.
    - -
    id(self)
    - -
    run(self, result=None)
    - -
    setUp(self)
    Hook method for setting up the test fixture before exercising it.
    - -
    shortDescription(self)
    Returns a one-line description of the test, or None if no
    -description has been provided.

    -The default implementation of this method returns the first line of
    -the specified test method's docstring.
    - -
    tearDown(self)
    Hook method for deconstructing the test fixture after testing it.
    - -
    -Data and other attributes inherited from TestCase:
    -
    __dict__ = <dictproxy object>
    dictionary for instance variables (if defined)
    - -
    __weakref__ = <attribute '__weakref__' of 'TestCase' objects>
    list of weak references to the object (if defined)
    - -
    failureException = <class exceptions.AssertionError>
    Assertion failed.
    - -

    - - - - - -
     
    -Functions
           
    getSuite()
    -

    - - - - - -
     
    -Data
           p = <simpleparse.parser.Parser instance>
    -production = 'prolog'
    -should = ['<?xml version="1.0"?> <!DOCTYPE greeting SYSTEM "hello.dtd">', '<?xml version="1.0" encoding="UTF-8" ?>\n\t\t\t\t<!DO...eting [\n\t\t\t\t <!ELEMENT greeting (#PCDATA)>\n\t\t\t]>', '<?xml version="1.0" standalone=\'yes\'?>', '<?xml version="1.0" encoding="UTF-8" ?>\n\t\t\t\t<!DO...#REQUIRED\n\t\t\t\t\t\t name CDATA #IMPLIED>\n\t\t\t]>', '<?xml version="1.0" encoding="UTF-8" ?>\n\t\t\t\t<!DO...!-- ... now reference it. -->\n\t\t\t\t%ISOLat2;\n\t\t\t]>']
    -shouldnot = []
    -testData = {'AttDef': ([' id ID #REQUIRED', ' name CDATA #IMPLIED', ' type (bullets|ordered|glossary) "ordered"', ' method CDATA #FIXED "POST"'], []), 'AttValue': (['"&this;"'], []), 'AttlistDecl': (['<!ATTLIST termdef\n id ID #REQUIRED\n name CDATA #IMPLIED>', '<!ATTLIST list\n type (bullets|ordered|glossary) "ordered">', '<!ATTLIST form\n method CDATA #FIXED "POST">'], []), 'Attribute': (['s=&this;', 's="&this;"', '&this;'], []), 'CharData': (['Type '], []), 'Comment': (['<!-- testing -->', '<!---->', '<!--- -->', '<!-- -- -- -->', '<!-- - - -->', '<!-- declarations for <head> & <body> -->'], ['<!-- -- -->', '<!-->', '<!-- B+, B, or B--->']), 'EntityDecl': (['<!ENTITY Pub-Status "This is a pre-release of the specification.">', '<!ENTITY open-hatch\n SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">', '<!ENTITY open-hatch\n PUBLIC "-//Textuali...://www.textuality.com/boilerplate/OpenHatch.xml">', '<!ENTITY hatch-pic\n SYSTEM "../grafix/OpenHatch.gif"\n NDATA gif >'], []), 'EntityDef': (['PUBLIC "-//Textuality//TEXT Standard open-hatch ...p://www.textuality.com/boilerplate/OpenHatch.xml"'], []), 'ExternalID': (['SYSTEM "hello.dtd"'], []), 'Name': (['abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-:._', '_a', ':a', ':a'], ['-a', '0', '0.0', '.this']), ...}
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.xml.html simpleparse-2.2.0/doc/pydoc/simpleparse.xml.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.xml.html 2006-02-19 01:05:12.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.xml.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,37 +0,0 @@ - - -Python: package simpleparse.xml - - - - -
     
    - 
    simpleparse.xml
    index
    /home/mcfletch/pylive/simpleparse/xml/__init__.py
    -

    XML Parsing package

    -At the moment it's really limited,
    -but it does the basics, and the rest
    -is mostly just a matter of fiddling
    -about with Unicode and CharacterType
    -support.  There is only very minimal
    -support for Reference types, basically
    -we note that a Reference exists, but
    -don't do any further processing of it.

    -

    - - - - - -
     
    -Package Contents
           __init__ -- XML Parsing package
    xml_parser -- XML Parser based (loosely) on the XML Spec's EBNF

    - - - - - -
     
    -Data
           __path__ = ['/home/mcfletch/pylive/simpleparse/xml']
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.xml.__init__.html simpleparse-2.2.0/doc/pydoc/simpleparse.xml.__init__.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.xml.__init__.html 2006-02-19 01:05:17.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.xml.__init__.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,23 +0,0 @@ - - -Python: module simpleparse.xml.__init__ - - - - -
     
    - 
    simpleparse.xml.__init__
    index
    /home/mcfletch/pylive/simpleparse/xml/__init__.py
    -

    XML Parsing package

    -At the moment it's really limited,
    -but it does the basics, and the rest
    -is mostly just a matter of fiddling
    -about with Unicode and CharacterType
    -support.  There is only very minimal
    -support for Reference types, basically
    -we note that a Reference exists, but
    -don't do any further processing of it.

    - - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/pydoc/simpleparse.xml.xml_parser.html simpleparse-2.2.0/doc/pydoc/simpleparse.xml.xml_parser.html --- simpleparse-2.1.0a1/doc/pydoc/simpleparse.xml.xml_parser.html 2006-02-19 01:05:17.000000000 +0000 +++ simpleparse-2.2.0/doc/pydoc/simpleparse.xml.xml_parser.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,58 +0,0 @@ - - -Python: module simpleparse.xml.xml_parser - - - - -
     
    - 
    simpleparse.xml.xml_parser
    index
    /home/mcfletch/pylive/simpleparse/xml/xml_parser.py
    -

    XML Parser based (loosely) on the XML Spec's EBNF

    -This is a hand-coded parser based on the W3C's XML specification,
    -there was a lot of busy-work rewriting to make the syntax agree,
    -but also a number of signficant structural changes required by
    -the limitations of the SimpleParse engine, and the completely
    -procedural definition of References in the XML spec (the References
    -don't occur in most places they can occur, and they are seen as
    -altering the buffer directly as soon as they are encountered, this
    -isn't something that fits readily into the mx.TextTools engine.

    -http://www.w3.org/TR/REC-xml#sec-references

    -Major Deviations from Spec:
    -        No support for the unicode-style character classes
    -        No support for UTF-16 (or Unicode at all, for that matter)
    -        No support for References that alter the production
    -                being parsed, so you can't have a Reference to an
    -                item "</this>and<this>" or similar non-structure-
    -                respecting References.  References have
    -                particular locations they can occur, and they are
    -                just ignored elsewhere
    -        No support for parsing the contents of References within
    -                the primary parsing pass
    -        No support for excluded start/end tags
    -        Comments allowed in both tags and declarations (but not
    -                inside content-specifiers).
    -        Allows end tags of the form </>

    -

    - - - - - -
     
    -Modules
           
    simpleparse.common.chartypes
    -
    simpleparse.common.numbers
    -
    simpleparse.common.strings
    -

    - - - - - -
     
    -Data
           declaration = "\n\n# Simple (changable) literals\n# These should b...>'\n\n\ndocument := prolog, element, Misc*\n"
    - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/doc/simpleparse_grammars.html simpleparse-2.2.0/doc/simpleparse_grammars.html --- simpleparse-2.1.0a1/doc/simpleparse_grammars.html 2004-01-04 22:14:41.000000000 +0000 +++ simpleparse-2.2.0/doc/simpleparse_grammars.html 2015-11-11 18:42:23.000000000 +0000 @@ -1,18 +1,16 @@ - - - - SimpleParse Grammars + + + + SimpleParse Grammars + - - - +

    SimpleParse Grammars

    SimpleParse uses a particular EBNF grammar which reflects the current @@ -46,7 +44,7 @@ simpleparse.objectgenerator, their syntax is defined in the module simpleparse.simpleparsegrammar. You can read a formal definition of the grammar used to define them at the end of this document.

    - +
    @@ -170,13 +168,23 @@

    Character Classes, Strings and Escape Characters

    Both character classes and strings in simpleparse may use octal -escaping (of 1 to 3 octal digits), hexadecimal escaping (2 digits) or -standard Python character escapes ( \a\b\f\n\r\t\v (Note that “u” and -“U” are missing and that “\n” is interpreted according to the local +escaping (of 1 to 3 octal digits), hexadecimal escaping (2 digits), or Unicode escaping (4 or 8 digits) or +standard Python character escapes (\a\b\f\n\r\t\v)

    +
      +
    • “\n” is interpreted according to the local machine, and thus may be non-portable, so an explicit declaration using hexadecimal code might be more suitable in cases where differentiation -is required)). Strings may be either single or double quoted (but not +is required).
    • +
    • the number of digits is fixed for Hexadecimal and Unicode escaping
    • +
    • you MUST use \u or \U escape codes to define Unicode characters, +using actual Unicode characters will not work (it will likely cause +seg-faults during parser generation) (Unicode support should be +considered experimental as well)
      +
    • +
    +

    Strings may be either single or double quoted (but not triple quoted).

    +

    To include a "]" character in a character class, make it the first character of the class. Similarly, a literal "-" character must be either the first (after the optional "]" character) or the last @@ -192,7 +200,7 @@

    Each element token can have a prefix and/or a postfix modifier applied to it to alter how the engine treats a match of the “base” element token.

    - +
    @@ -367,7 +375,7 @@

    Default Result Tree Generation

    Here are some examples showing sample productions and the result trees they would generate.

    - +
    @@ -404,10 +412,9 @@ without generating extraneous result-tuples in the results tree. Both of these methods still produce standard results trees so no special work is -required to process the results tree. (There are methods described in Processing Results Trees which +required to process the results tree. (There are methods described in Processing Results Trees which can generate non-standard result trees for special purposes).

    -
    s := "this"
    +
    @@ -495,18 +502,7 @@ version found in the simpleparse.simpleparsegrammar module.

    declaration = r"""declarationset      :=  declaration+
    declaration := ts, (unreportedname/expandedname/name) ,ts,':',':'?,'=',seq_group

    element_token := lookahead_indicator?, ts, negpos_indicator?,ts, (literal/range/group/name),ts, occurence_indicator?, ts, error_on_fail?

    negpos_indicator := [-+]
    lookahead_indicator := "?"
    occurence_indicator := [+*?]
    error_on_fail := "!", (ts,literal)?

    >group< := '(',seq_group, ')'
    seq_group := ts,(error_on_fail/fo_group/element_token),
    (ts, seq_indicator, ts,
    (error_on_fail/fo_group/element_token)
    )*, ts

    fo_group := element_token, (ts, fo_indicator, ts, element_token)+


    # following two are likely something peoples might want to
    # replace in many instances...
    <fo_indicator> := "/"
    <seq_indicator> := ','

    unreportedname := '<', name, '>'
    expandedname := '>', name, '<'
    name := [a-zA-Z_],[a-zA-Z0-9_]*
    <ts> := ( [ \011-\015]+ / comment )*
    comment := '#',-'\n'*,'\n'
    literal := literalDecorator?,("'",(CHARNOSNGLQUOTE/ESCAPEDCHAR)*,"'") / ('"',(CHARNODBLQUOTE/ESCAPEDCHAR)*,'"')
    literalDecorator := [c]



    range := '[',CHARBRACE?,CHARDASH?, (CHARRANGE/CHARNOBRACE)*, CHARDASH?,']'
    CHARBRACE := ']'
    CHARDASH := '-'
    CHARRANGE := CHARNOBRACE, '-', CHARNOBRACE
    CHARNOBRACE := ESCAPEDCHAR/CHAR
    CHAR := -[]]
    ESCAPEDCHAR := '\\',( SPECIALESCAPEDCHAR / ('x',HEXESCAPEDCHAR) / ("u",UNICODEESCAPEDCHAR_16) /("U",UNICODEESCAPEDCHAR_32)/OCTALESCAPEDCHAR )
    SPECIALESCAPEDCHAR := [\\abfnrtv"']
    OCTALESCAPEDCHAR := [0-7],[0-7]?,[0-7]?
    HEXESCAPEDCHAR := [0-9a-fA-F],[0-9a-fA-F]
    CHARNODBLQUOTE := -[\\"]+
    CHARNOSNGLQUOTE := -[\\']+
    UNICODEESCAPEDCHAR_16 := [0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F]
    UNICODEESCAPEDCHAR_32 := [0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F]
    """
    Up to index...
    -

    A SourceForge Logo
    +

    A SourceForge Logo
    Open Source project

    -
    -
    -
    -
    -
    -
    -
    -
    - - + \ No newline at end of file diff -Nru simpleparse-2.1.0a1/error.py simpleparse-2.2.0/error.py --- simpleparse-2.1.0a1/error.py 2006-02-19 00:39:06.000000000 +0000 +++ simpleparse-2.2.0/error.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,55 +0,0 @@ -"""Definition of the ParserSyntaxError raised on parse failure""" -import string -from simpleparse.stt.TextTools.TextTools import countlines - -class ParserSyntaxError( SyntaxError ): - """Sub-class of SyntaxError for use by SimpleParse parsers - - Every instance will have the following attributes: - buffer -- pointer to the source buffer - position -- integer position in buffer where error occured or -1 - production -- the production which failed - expected -- string (currently taken from grammar) describing - what production/element token failed to match - the following will be calculated in order to display - human-friendly error messages: - line -- ~ text line-number or -1 - lineChar -- ~ character on line where parsing failed or -1 - - """ - buffer = "" - position = -1 - line = -1 - production = "" - expected = "" - DEFAULTTEMPLATE = """Failed parsing production "%(production)s" @pos %(position)s (~line %(line)s:%(lineChar)s).\nExpected syntax: %(expected)s\nGot text: %(text)s""" - def __str__( self ): - """Create a string representation of the error""" - if self.message: - return '%s: %s'%( self.__class__.__name__, self.messageFormat(self.message) ) - else: - return '%s: %s'%( self.__class__.__name__, self.messageFormat() ) - def messageFormat( self, template=None): - """Create a default message for this syntax error""" - if template is None: - template = self.DEFAULTTEMPLATE - line, lineChar = self.getLineCoordinate() - variables = { - "production": self.production, - "position": self.position, - "line": line, - "lineChar": lineChar, - "expected": self.expected or "UNKNOWN", - "text": repr(self.buffer[ self.position:self.position+50 ]), - } - return template % variables - def getLineCoordinate( self ): - """Get (line number, line character) for the error""" - lineChar = string.rfind( self.buffer, '\n', 0, self.position) - if lineChar == -1: # was no \n before the current position - lineChar = self.position - line = 1 - else: - line = countlines( self.buffer[:lineChar] ) - lineChar = self.position-lineChar - return line, lineChar diff -Nru simpleparse-2.1.0a1/examples/bad_declaration.py simpleparse-2.2.0/examples/bad_declaration.py --- simpleparse-2.1.0a1/examples/bad_declaration.py 2002-08-09 07:23:14.000000000 +0000 +++ simpleparse-2.2.0/examples/bad_declaration.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,28 +0,0 @@ -"""Demonstrates what happens when your declaration is syntactically incorrect - -When run as a script, will generate a traceback -telling you that the grammar defined here is -incorrectly formatted. -""" -from simpleparse.common import numbers, strings, comments - -declaration = r'''# note use of raw string when embedding in python code... -file := [ \t\n]*, section+ -section := '[',identifier,']' ts,'\n', body -body := statement* -statement := (ts,semicolon_comment)/equality/nullline -nullline := ts,'\n' -comment := -'\n'* -equality := ts, identifier,ts,'=',ts,identified,ts,'\n' -identifier := [a-zA-Z], [a-zA-Z0-9_]* -identified := string/number/identifier -ts := [ \t]* -''' - -testdata = '''[test1] - val=23 -''' -if __name__ == "__main__": - from simpleparse.parser import Parser - parser = Parser( declaration, "file" ) # will raise ValueError - diff -Nru simpleparse-2.1.0a1/examples/findlineset.py simpleparse-2.2.0/examples/findlineset.py --- simpleparse-2.1.0a1/examples/findlineset.py 2004-02-01 00:58:48.000000000 +0000 +++ simpleparse-2.2.0/examples/findlineset.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,60 +0,0 @@ -declaration = r""" - -firstLine := "This is first line" -secondLine := "This is second line" -fifthLine := "This is fifth line" - - := [ \t]* - -# the actual text strings are included directly -# for the negative versions which is basically to -# avoid the overhead of the name-ref indirection -# (which should be optimised away automatically, but isn't) -set := -firstLine*, firstLine, -secondLine*, secondLine, -fifthLine*, fifthLine -sets := set* -""" - -from simpleparse.parser import Parser - -p = Parser( declaration, 'set' ) - -file1 = """This is first line -This is second line -This is NOT first line -This is NOT second line -This is fifth line -This is NOT fifth line -""" -file2 = """This is first line -This is fifth line -This is second line -This is NOT first line -This is NOT second line -This is NOT fifth line -""" -if __name__ == "__main__": - import pprint - import time - pprint.pprint( - p.parse( file1) - ) - pprint.pprint( - p.parse( file2) - ) - testData = "\n"*30000000 + file1 - print 'starting parse of file 1 with 1 match at end' - t = time.clock() - success, results, next = p.parse( testData, "sets") - print 'finished parse', time.clock()-t - print 'number of results', len(results) - pprint.pprint( - results - ) - print - testData = file1 * (30000000/len(file1)) - print 'starting parse of file 1 with ~230,000 matches (slow)' - t = time.clock() - success, results, next = p.parse( testData, "sets") - print 'finished parse', time.clock()-t - print 'number of results', len(results) - diff -Nru simpleparse-2.1.0a1/examples/findliterals.py simpleparse-2.2.0/examples/findliterals.py --- simpleparse-2.1.0a1/examples/findliterals.py 2002-07-10 05:14:48.000000000 +0000 +++ simpleparse-2.2.0/examples/findliterals.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,48 +0,0 @@ -import os, string -from simpleparse.parser import Parser - -declaration = r''' -myfile := (notliteral,literal)+, notliteral - -# not-a-literal, not reported, repeating - := -literal* - -literal := ("'",(CHARNOSNGLQUOTE/ESCAPEDCHAR)*,"'") / ('"',(CHARNODBLQUOTE/ESCAPEDCHAR)*,'"') - -CHARNOSNGLQUOTE := -[\\']+ -CHARNODBLQUOTE := -[\\"]+ -ESCAPEDCHAR := '\\',( SPECIALESCAPEDCHAR / OCTALESCAPEDCHAR ) -SPECIALESCAPEDCHAR := [\\abfnrtv] -OCTALESCAPEDCHAR := [0-7],[0-7]?,[0-7]? -''' -parser = Parser( declaration, "myfile" ) - -def bigtest( file, parser = parser ): - val = parser.parse( file) - print 'parsed %s characters of %s characters' % (val[-1], len(file)) - return val - -def test(): - bigtest( ''' "this" "that" "them" ''' ) - bigtest( ''' "this" 'that' "th'em" ''' ) - - -usage =''' findliterals filename -Finds all single and double-quoted literals in a file and prints them to stdout. -Is not triple-quoted string aware.''' - -if __name__ == '__main__': - test() - import sys - if sys.argv[1:]: - import time - filename = sys.argv[1] - file = open( filename ).read() - t = time.time() - val = bigtest( file ) - t = t-time.time() - print '''Parsing Time:''', t - for report, start, stop, children in val[1]: - print string.split(file[ start: stop ], '\n')[0][:75] - else: - print usage diff -Nru simpleparse-2.1.0a1/examples/formatvrml.py simpleparse-2.2.0/examples/formatvrml.py --- simpleparse-2.1.0a1/examples/formatvrml.py 2002-08-09 06:36:18.000000000 +0000 +++ simpleparse-2.2.0/examples/formatvrml.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,195 +0,0 @@ -"""Example using a parser to format VRML97 code as HTML w/CSS -""" -import sys -import os, string -from simpleparse.parser import Parser - -# we use a non-standard VRML parser definition which retains comments and whitespace -VRMLPARSERDEF = r''' -# Specialised VRML parser for colourising VRML files -# Specialisation is minor, mostly changes to what information -# is reported and what information is discarded. - -vrmlFile := '#',header,'\n',ts, vrmlScene, ts -header := -[\n]* -vrmlScene := (Proto/ExternProto/ROUTE/('USE',ts,USE,ts)/Script/Node/SFNull)* -Proto := PROTO,ts,nodegi,ts,'[',ts,(fieldDecl/eventDecl)*,']', ts, '{', ts, vrmlScene,ts, '}', ts -fieldDecl := fieldExposure,ts,dataType,ts,attrName,ts,Field,ts -fieldExposure := 'field'/'exposedField' -dataType := 'SFBool'/'SFString'/'SFFloat'/'SFTime'/'SFVec3f'/'SFVec2f'/'SFRotation'/'SFInt32'/'SFImage'/'SFRotation'/'SFColor'/'SFNode'/'MFBool'/'MFString'/'MFFloat'/'MFTime'/'MFVec3f'/'MFVec2f'/'MFRotation'/'MFInt32'/'MFRotation'/'MFColor'/'MFNode' -eventDecl := eventDirection, ts, dataType, ts, eventName, ts -eventDirection := 'eventIn'/'eventOut' -ExternProto := EXTERNPROTO,ts,nodegi,ts,'[',ts,(extFieldDecl/eventDecl)*,']', ts, ExtProtoURL -extFieldDecl := fieldExposure,ts,dataType,ts,name,ts -ExtProtoURL := '['?,ts,SFString+, ']'?, ts # just an MFString by another name :) -ROUTEData := 'ROUTE',ts, DEFName,'.',DEFName, ts, 'TO', ts, DEFName,'.',DEFName -ROUTE := ROUTEData, ts -Node := (DEF,ts,DEFName,ts)?,nodegi,ts,'{',ts,(Proto/ExternProto/ROUTE/Attr)*,ts,'}', ts -Script := (DEF,ts,DEFName,ts)?,scriptgi,ts,'{',ts,(ScriptFieldDecl/ScriptEventDecl/Proto/ExternProto/ROUTE/Attr)*,ts,'}', ts -ScriptEventDecl := eventDirection, ts, dataType, ts, attrName, ts, ('IS', ts, IS,ts)? -ScriptFieldDecl := fieldExposure,ts,dataType,ts,attrName,ts,(('IS', ts,IS,ts)/Field),ts -SFNull := 'NULL', ts - -EXTERNPROTO := 'EXTERNPROTO' -PROTO := 'PROTO' -scriptgi := 'Script' -DEF := 'DEF' -eventName := name -DEFName := name -USE := name -IS := name -nodegi := name -attrName := name -Attr := attrName, ts, (('IS', ts,IS,ts)/Field), ts -Field := ( '[',ts,((SFNumber/SFBool/SFString/('USE',ts,USE,ts)/Script/Node),ts)*, ']', ts )/((SFNumber/SFBool/SFNull/SFString/('USE',ts,USE,ts)/Script/Node),ts)+ - - := -[][0-9{}\000-\020"'#,.\\ ], -[][{}\000-\020"'#,.\\ ]* - := [-+0-9.]+,([eE],[-+0-9.]+)? - := 'TRUE'/'FALSE' -SFString := '"',(CHARNODBLQUOTE/ESCAPEDCHAR)*,'"' - := -[\134"]+ - := '\\"'/'\134\134' -comment := '#',-'\012'*,'\n' -ts := ( [ \011-\015,]+ / comment+ )* -''' -vrmlparser = Parser( VRMLPARSERDEF, 'vrmlFile' ) - -class VRMLFormatter: - ''' - Base formatting class - ''' - def __init__(self, infile, vrmlparser = vrmlparser ): - self.infile = open( infile ).read() - self.tree = vrmlparser.parse( self.infile )[1] # the list of children - # construct a dummy "vrmlFile" node, should get that fixed in TextTools some day - self.tree = ('vrmlFile', 0, len(self.infile), self.tree ) - - def _format( self, tup, outfile, infile ): - ''' - Step through the children, our result is - thisnode's head, data_to_first_child, firstchild, data_to_second_child, secondchild,...,data_from_last_child, thisnode's tail - ''' - nodetype = tup[0] - # write preceding formatting - hdata = self._headdata( nodetype, 1 ) - if hdata is not None: - outfile.write( hdata ) - startPos = tup[1] - children = tup[3][:] - while children: - outfile.write( self._escapeData( infile[ startPos: children[0][1] ] ) ) - self._format( children[0], outfile, infile ) - startPos = children[0][2] - del children [0] - # now write this node's data from startPos to endPos - outfile.write( self._escapeData( infile[startPos: tup[2] ]) ) - # write trailing formatting - hdata = self._headdata( nodetype, 0 ) - if hdata is not None: - outfile.write( hdata ) - - def _headdata( self, nodetype, head=1 ): - ''' - Return head or tail data for this nodetype if available, None otherwise - ''' - if head: - head = '_head' - else: - head = '_tail' - if hasattr( self, nodetype+head ): - return getattr( self, nodetype+head) % locals() - def _escapeData( self, data ): - return data - - def format( self, outfile ): - outfile = open( outfile, 'w' ) - self._format( self.tree, outfile, self.infile ) - outfile.close() - -class HTMLVRMLFormatter( VRMLFormatter ): - ''' - Format VRML files for display in HTML - ''' - def _escapeData( self, data ): - return string.join( string.split( - string.join( string.split( - string.join( string.split( - string.join( string.split( data, '&' ), '&' ), - '<'), '<'), - '>'), '>'), - '\t'), ' ') - - - NODEMAP = { - 'vrmlFile': '''
    ''',
    -		'vrmlFile_tail':'''\n
    ''', - 'header':'<%(head)sfont color="purple">', - 'header_tail':'<%(head)sfont>', - 'comment':'', - 'comment_tail':'', - 'PROTO':'', - 'PROTO_tail':'', - 'EXTERNPROTO':'', - 'EXTERNPROTO_tail':'', - 'SFString':'', - 'SFString_tail':'', - - - 'DEF':'<%(head)sstrong>', -# 'name':'<%(head)sfont color="green">', -# 'name_tail':'<%(head)sfont>', - 'DEFName':'', - 'DEFName_tail':'', - 'nodegi':'', - 'nodegi_tail':'', - 'scriptgi':'', - 'scriptgi_tail':'', - 'ROUTEData':'', - 'ROUTEData_tail':'', - 'attrName':'', - 'attrName_tail':'', - 'fieldExposure':'', - 'fieldExposure_tail':'', - 'dataType':'', - 'dataType_tail':'', - 'eventDirection':'', - 'eventDirection_tail':'', - - - } - def _headdata( self, nodetype, head=1): - if head: - head = '' - return self.NODEMAP.get( nodetype, '' )%locals() - else: - head = '/' - val = self.NODEMAP.get( nodetype+'_tail', '' )%locals() - if not val: - return self.NODEMAP.get( nodetype, '' )%locals() - else: - return val - -usage = '''formatvrml.py infile outfile - infile -- properly formatted VRML 97 file - outfile -- destination for output HTML (will overwrite if present) - -Description: - Formatvrml is a simple script for syntax-coloring VRML 97 code for - presentation on web sites and/or in documentation. To use it, just - run the script with your source and destination files. Copy the - HTML and the css file to your web server. - - The syntax coloring is all done with a Cascading Style Sheet link - at the top of the file (to a file named vrmlCode.css in the same - directory as the HTML file). You can change the formatting of your - VRML by changing this file's definitions. -''' - -if __name__ == '__main__': - import sys - if len( sys.argv) != 3: - print usage - raw_input('Press to exit:') - else: - file = HTMLVRMLFormatter( sys.argv[1] ) - file.format( sys.argv[2] ) diff -Nru simpleparse-2.1.0a1/examples/__init__.py simpleparse-2.2.0/examples/__init__.py --- simpleparse-2.1.0a1/examples/__init__.py 2002-08-09 07:23:14.000000000 +0000 +++ simpleparse-2.2.0/examples/__init__.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,7 +0,0 @@ -'''Examples of use for the SimpleParse parser generator - -Included are VRML97, EBNF and LISP parsers, as -well as a demonstration of using "pre-built" -parser nodes (particularly one based on the re -module). -''' \ No newline at end of file diff -Nru simpleparse-2.1.0a1/examples/lisp.py simpleparse-2.2.0/examples/lisp.py --- simpleparse-2.1.0a1/examples/lisp.py 2006-02-18 23:12:38.000000000 +0000 +++ simpleparse-2.2.0/examples/lisp.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,65 +0,0 @@ -"""Basic LISP parser - -We use library items, so we get " strings, float, int, and hex -atoms, as well as lists. Note: Lisp doesn't seem to -use "," for seperating atoms in lists? I don't really -remember it well enough to recall, but seems to match the -samples I see. - -Note: Original grammar was from a sample in the YAPPS -documentation. Though it's kinda hard to recognise here. -""" - -definition = r""" -### A simple LISP parser - - := [ \t\n\r]* - := [-+*/!@%^&=.a-zA-Z0-9_] -quote := "'" -name := nameChar+ ->atom< := quote / string_double_quote / list / number_expr / name - -# numbers are regular number values followed -# by something that is _not_ a nameCharacter -number_expr := number, ?-(nameChar) -list := "(", seq?, ")"! ->seq< := ts, atom, (ts,atom)*, ts -""" -from simpleparse.parser import Parser -from simpleparse.common import strings, numbers -from simpleparse.dispatchprocessor import * - -parser = Parser( definition, 'atom' ) - -if __name__ == "__main__": - from simpleparse.stt.TextTools import print_tags - - shouldParse = [ - "(+ 2 3)", - "(- 2 3)", - "(* 2 3)", - "(quote (2 3 4))", - "(23s (2.4s 3s 45.3))", - "(() () (2 3 4))", - "()", - '''("thisand that" ())''', - '"this"', - '''('"this")''', - '''("this\n\r" ' those (+ a b) (23s 0xa3 55.3) "s")''', - r'''("this\n\r" ' those (+ a b) (23s 0xa3 55.3) "s")''', - r'''("this\n\r" ' those (+ a b) (23s 0xa3 55.3] "s")''', - '''("this\n\r" ' those (+ a b) (23s 0xa3 55.3\n\n] "s")''', - '''(with-pedantry :high It's "Scheme In One Defun".)''', - - ] - import pprint - for item in shouldParse: - try: - success, children, next = parser.parse( item ) - if not success: - print 'fail', item - else: - print 'success', item, next - pprint.pprint( children ) - except SyntaxError, err: - print err diff -Nru simpleparse-2.1.0a1/examples/prebuilt_call.py simpleparse-2.2.0/examples/prebuilt_call.py --- simpleparse-2.1.0a1/examples/prebuilt_call.py 2006-02-18 23:12:47.000000000 +0000 +++ simpleparse-2.2.0/examples/prebuilt_call.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,62 +0,0 @@ -"""Example using pre-built "re" parsing object - -The Pre-built Element Token lets you include elements -which cannot be readily defined in the SimpleParse EBNF -including items defined by a callout to a Python -function. This example demonstrates the technique. - -The example also (obviously) demonstrates the use of an -re object during the parsing process. -""" -import re -from simpleparse.stt.TextTools.TextTools import * -from simpleparse.parser import Parser -from simpleparse import dispatchprocessor - -class REMatch: - """An object wrapping a regular expression with __call__ (and Call) semantics""" - def __init__( self, expression, flags=0 ): - self.matcher = re.compile( expression, flags ) - def __call__( self, text, position, endPosition ): - """Return new text position, if > position, then matched, otherwise fails""" - result = self.matcher.match( text, position, endPosition) - if result: - return result.end() - else: - # doesn't necessarily mean it went forward, merely - # that it was satisfied, which means that an optional - # satisfied but un-matched re will just get treated - # like an error :( - return position - def table( self ): - """Build the TextTools table for the object""" - return ( (None, Call, self ), ) - -declaration = r""" -v := white?,(word,white?)+ -""" - -class WordProcessor( dispatchprocessor.DispatchProcessor ): - """Processor sub-class defining processing functions for the productions""" - # you'd likely provide a "resetBeforeParse" method - # in a real-world application, but we don't store anything - # in our parser. - def word( self, tup, buffer ): - """Deal with a "word" production by printing out value""" - print "word: ", repr(dispatchprocessor.getString(tup, buffer)) - def white( self, tup, buffer ): - """Deal with a "white" production by printing out value""" - print "white:", repr(dispatchprocessor.getString(tup, buffer)) - - -parser = Parser( declaration, "v", prebuilts = [ - ("word", REMatch( "\w+").table()), - ("white", REMatch( "\W+").table()), -]) - -if __name__ == "__main__": - print """Please enter some number of words seperated by whitespace. -We will attempt to parse them and return the parse results""" - data = raw_input( ">>> " ) - parser.parse( data , processor = WordProcessor()) - diff -Nru simpleparse-2.1.0a1/examples/py_ebnf.py simpleparse-2.2.0/examples/py_ebnf.py --- simpleparse-2.1.0a1/examples/py_ebnf.py 2006-02-18 23:12:54.000000000 +0000 +++ simpleparse-2.2.0/examples/py_ebnf.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,90 +0,0 @@ -"""This module defines a parser for the EBNF format used to define Python's grammar - -The grammar in the language reference (as seen in Python 2.2.1) -seems to be somewhat messed up. I've tried to fix the glaring -errors (such as control codes included in the text version) and -have saved this in the examples directory. - -This parser does parse the entire (fixed) grammar, with the -exception of the style comments which were used in a few -places in the grammar to say "range of characters" - -What this doesn't do, is try to _use_ the parsed grammar. The -grammar is assuming a very different parser type than SimpleParse, -for instance, it assumes that alternation (|) will use longest-of -semantics, so that: - - int := blah - long_int := int, [lL] - all_ints := int, long_int - -Would match long_int (because it's longest), rather than int, which -is what the base SimpleParse FOGroup would do. You could fairly -trivially make a processor similar to the simpleparsegrammar one -to make objectgenerator objects from the parsed format, but the -resulting parser just wouldn't work because of the differences in -parser capability. - -Basically, we'll want to have a new back-end before continuing on -with this demo. - -The grammar being parsed (and included) is part of Python, so -here's the copyright notice: - - Python is Copyright (c) 2001, 2002 Python Software Foundation. - All Rights Reserved. - - Copyright (c) 2000 BeOpen.com. - All Rights Reserved. - - Copyright (c) 1995-2001 Corporation for National Research Initiatives. - All Rights Reserved. - - Copyright (c) 1991-1995 Stichting Mathematisch Centrum, Amsterdam. - All Rights Reserved. - -You should have a full copy of the Python license in your Python -distribution. -""" -declaration = r""" - -declarationset := declaration+ -declaration := ts, '\n'?, name ,ts,'::=',fo_group - ->group< := '(',fo_group, ')' ->fo_group_children< := (seq_group/element_token) -fo_group := ts,fo_group_children, - (ts, fo_indicator, ts, - fo_group_children - )*, ts - -seq_group := ts, element_token, (ts, element_token)+, ts - -element_token := (optional_element / base_element), repetition? - -repetition := ('*'/'+') -optional_element := '[',fo_group,']' ->base_element< := (range/string/group/name) - := '|' - -name := [a-zA-Z_],[a-zA-Z0-9_]* - := ( - ('\n', ?-name) / - [ \011]+ / - comment -)* -comment := '#',-'\n'+,'\n' - -range := string, ts, '...', ts, string - -""" -from simpleparse.parser import Parser -from simpleparse.common import strings - -parser = Parser( declaration ) -if __name__ == "__main__": - from simpleparse.stt.TextTools import print_tags - grammar = open("""py_grammar.txt""").read() - success, result, next = parser.parse( grammar, 'declarationset') - print 'success', success, next - print_tags( grammar, result ) diff -Nru simpleparse-2.1.0a1/examples/simpleexample2_1.py simpleparse-2.2.0/examples/simpleexample2_1.py --- simpleparse-2.1.0a1/examples/simpleexample2_1.py 2002-07-07 00:47:45.000000000 +0000 +++ simpleparse-2.2.0/examples/simpleexample2_1.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,13 +0,0 @@ -from simpleparse.common import numbers, strings, comments - -declaration = r'''# note use of raw string when embedding in python code... -file := [ \t\n]*, section+ -section := '[',identifier,']', ts,'\n', body -body := statement* -statement := (ts,semicolon_comment)/equality/nullline -nullline := ts,'\n' -equality := ts, identifier,ts,'=',ts,identified,ts,'\n' -identifier := [a-zA-Z], [a-zA-Z0-9_]* -identified := string/number/identifier -ts := [ \t]* -''' diff -Nru simpleparse-2.1.0a1/examples/simpleexample2_2.py simpleparse-2.2.0/examples/simpleexample2_2.py --- simpleparse-2.1.0a1/examples/simpleexample2_2.py 2002-08-09 07:23:14.000000000 +0000 +++ simpleparse-2.2.0/examples/simpleexample2_2.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,37 +0,0 @@ -"""Re-written version of simpleexample for 2.0 - -Shows use of Parser to check syntax of declaration and -test that a particular production is matching what we -expect it to match... -""" -from simpleparse.common import numbers, strings, comments - -declaration = r'''# note use of raw string when embedding in python code... -file := [ \t\n]*, section+ -section := '[',identifier,']', ts,'\n', body -body := statement* -statement := (ts,semicolon_comment)/equality/nullline -nullline := ts,'\n' -equality := ts, identifier,ts,'=',ts,identified,ts,'\n' -identifier := [a-zA-Z], [a-zA-Z0-9_]* -identified := string/number/identifier -ts := [ \t]* -''' - -from simpleparse.parser import Parser -parser = Parser( declaration ) - -testEquality = [ - "s=3\n", - "s = 3\n", - ''' s="three\\nthere"\n''', - ''' s=three\n''', -] - -production = "equality" - -if __name__ =="__main__": - for testData in testEquality: - success, children, nextcharacter = parser.parse( testData, production=production) - assert success and nextcharacter==len(testData), """Wasn't able to parse %s as a %s (%s chars parsed of %s), returned value was %s"""%( repr(testData), production, nextcharacter, len(testData), (success, children, nextcharacter)) - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/examples/simpleexample2_3.py simpleparse-2.2.0/examples/simpleexample2_3.py --- simpleparse-2.1.0a1/examples/simpleexample2_3.py 2002-08-09 07:23:14.000000000 +0000 +++ simpleparse-2.2.0/examples/simpleexample2_3.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,31 +0,0 @@ -"""Re-written version of simpleexample for 2.0""" -from simpleparse.common import numbers, strings, comments - -declaration = r'''# note use of raw string when embedding in python code... -file := [ \t\n]*, section+ -section := '[',identifier,']', ts,'\n', body -body := statement* -statement := (ts,semicolon_comment)/equality/nullline -nullline := ts,'\n' -equality := ts, identifier,ts,'=',ts,identified,ts,'\n' -identifier := [a-zA-Z], [a-zA-Z0-9_]* -identified := string/number/identifier -ts := [ \t]* -''' -testData = """[test1] - val=23 - val2="23" - val3 = "23\t\nskidoo\xee" - wherefore="art thou" - ; why not - log = heavy_wood - -[test2] -loose=lips -""" -from simpleparse.parser import Parser -import pprint - -parser = Parser( declaration, "file" ) -if __name__ =="__main__": - pprint.pprint( parser.parse( testData)) diff -Nru simpleparse-2.1.0a1/examples/simpleexample.py simpleparse-2.2.0/examples/simpleexample.py --- simpleparse-2.1.0a1/examples/simpleexample.py 2002-08-09 07:23:14.000000000 +0000 +++ simpleparse-2.2.0/examples/simpleexample.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,33 +0,0 @@ -declaration = r'''# note use of raw string when embedding in python code... -file := [ \t\n]*, section+ -section := '[',identifier,']', ts,'\n', body -body := statement* -statement := (ts,';',comment,'\n')/equality/nullline -nullline := ts,'\n' -comment := -'\n'* -equality := ts, identifier,ts,'=',ts,identified,ts,'\n' -identifier := [a-zA-Z], [a-zA-Z0-9_]* -identified := ('"',string,'"')/number/identifier -ts := [ \t]* -char := -[\134"]+ -number := [0-9eE+.-]+ -string := (char/escapedchar)* -escapedchar := '\134"' / '\134\134' -''' -testdata = '''[test1] - val=23 - val2="23" - wherefore="art thou" - ; why not - log = heavy_wood - -[test2] -loose=lips - -''' -from simpleparse.parser import Parser -import pprint - -parser = Parser( declaration, "file" ) -if __name__ =="__main__": - pprint.pprint( parser.parse( testdata)) diff -Nru simpleparse-2.1.0a1/examples/transformation.py simpleparse-2.2.0/examples/transformation.py --- simpleparse-2.1.0a1/examples/transformation.py 2002-08-09 06:36:02.000000000 +0000 +++ simpleparse-2.2.0/examples/transformation.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,103 +0,0 @@ -"""A simple example of parsing - -I have no idea for whom I originally created this code, -(which was originally written for SimpleParse 1.0) nor -why they wanted it. Oh well, such is life. - -Running as a script will do some timing tests, but the -tests are rather... simplistic. - -The grammar is slow parsing around 5-10% of the speed I -normally expect from SimpleParse/mxTextTools parsers. -I'm guessing it gets into lots and lots of partial parses -of the "interesting" production, and that the huge number -of reported productions slows it down. For example, -making atom non-reporting gives a 15% speedup on my -machine. -""" - -declaration = r''' -set := (interesting/multset/plusset)+ -multset := '*',(set/atom), (set/atom) -plusset := '+',(set/atom), (set/atom) -atom := -[+*] - ->interesting< := (example8/example7/example6/example5/example4/example3/example2/example1) -example1 := '*+',(set/atom),(set/atom),'+',(set/atom),(set/atom) -example2 := '**',(set/atom),(set/atom),'++',(set/atom),(set/atom),(set/atom) -example3 := 'fsd*',(set/atom),(set/atom),'++',(set/atom),(set/atom),(set/atom) -example4 := 'm*',(set/atom),(set/atom),'++',(set/atom),(set/atom),(set/atom) -example5 := 'a*',(set/atom),(set/atom),'++',(set/atom),(set/atom),(set/atom) -example6 := 's*',(set/atom),(set/atom),'++',(set/atom),(set/atom),(set/atom) -example7 := 'bdf*',(set/atom),(set/atom),'++',(set/atom),(set/atom),(set/atom) -example8 := 'sd*',(set/atom),(set/atom),'++',(set/atom),(set/atom),(set/atom) -''' -import sys, string -from simpleparse.parser import Parser -parser = Parser(declaration,'set') - - -class Emitter: - def process( self, data ): - #import pprint - tree = self.parse( data ) - #pprint.pprint( tree ) - # wrap up the tuple 'cause TextTools uses a different format for the top-level :( - tree = ('set',0, tree[-1], tree[1] ) - return self.emit( tree ) - def parse( self, data ): - self.data = data - return parser.parse( data) - def write( self, data ): - sys.stdout.write( data ) - def emit( self, tree ): - ''' - return transformation for a single tuple... - ''' - if hasattr( self, 'emit' + tree[0] ): # have explicitprocessing function - func = getattr( self, 'emit'+tree[0] ) - return func( tree ) - else: - if tree[3]: # children to process, things to do :) - result = [] - ### write out pre-elements - endpos = tree[3][0][1] # start of first child - result.append( self.data[ tree[1]:endpos] ) - ### write children - for child in tree[3]: - result.append( self.emit( child ) ) - ### write out post elements - startpos = tree[3][-1][2] # end of last child - result.append( self.data[ startpos: tree[2]] ) - return string.join( result, '' ) - else: - # we're just re-emitting same text... - return self.data[ tree[1]:tree[2]] - def emitexample1( self, tuple ): - '''*+AB+CD -> ++*AC*AD+*BC*BD''' - #print 'interesting' - #import pdb - #pdb.set_trace() - a,b,c,d = map( self.emit, tuple[3] ) - #print `(a,b,c,d)`, - return '++*%s%s*%s%s+*%s%s*%s%s'%( a,c,a,d,b,c,b,d) - -if __name__ == "__main__": - - testdata = [ - '''++m*++mkp+f*nkf''', - '''*+ab+cd''', - '''+ab+bc+de''', - '''*ab*bc*de''', - '''++m*++mkp+f*nkf'''*10000, - ] - - a = Emitter() - import time, profile - for test in testdata: - t = time.time() - a.parse( test ) - t = time.time()-t - print 'total time', t, 'length', len(test) - if t: - print ' %s cps' % (len(test)/t) diff -Nru simpleparse-2.1.0a1/examples/vrml_erronfail.py simpleparse-2.2.0/examples/vrml_erronfail.py --- simpleparse-2.1.0a1/examples/vrml_erronfail.py 2002-08-09 07:07:40.000000000 +0000 +++ simpleparse-2.2.0/examples/vrml_erronfail.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,97 +0,0 @@ -"""VRML97-compliant Parser - -This example is a full VRML97 parser, originally created -for the mcf.vrml VRML-processing system. It supports all -VRML97 constructs, and should be correct for any VRML97 -content you can produce. The parser is fairly fast -(parsing around 280,000 cps on a 1GHz Athlon machine). - -This is the errorOnFail version of the grammar, otherwise -identical to the vrml.py module. Note: there is basically -no speed penalty for the errorOnFail version compared to -the original version, as the errorOnFail code is not touched -unless a syntax error is actually found in the input text. -""" -from simpleparse.parser import Parser -from simpleparse.common import chartypes - -#print file -VRMLPARSERDEF = r'''header := -[\n]* -vrmlFile := header, vrmlScene, EOF -rootItem := ts,(Proto/ExternProto/ROUTE/('USE',ts,USE,ts)/Script/Node),ts -vrmlScene := rootItem* - -Proto := 'PROTO',ts,!, nodegi,ts,'[',ts,(fieldDecl/eventDecl)*,']', ts, '{', ts, vrmlScene,ts, '}', ts -fieldDecl := fieldExposure,ts,!,dataType,ts,name,ts,Field,ts -fieldExposure := 'field'/'exposedField' -dataType := 'SFBool'/'SFString'/'SFFloat'/'SFTime'/'SFVec3f'/'SFVec2f'/'SFRotation'/'SFInt32'/'SFImage'/'SFColor'/'SFNode'/'MFBool'/'MFString'/'MFFloat'/'MFTime'/'MFVec3f'/'MFVec2f'/'MFRotation'/'MFInt32'/'MFColor'/'MFNode' -eventDecl := eventDirection, ts, !,dataType, ts, name, ts -eventDirection := 'eventIn'/'eventOut' -ExternProto := 'EXTERNPROTO',ts,!,nodegi,ts,'[',ts,(extFieldDecl/eventDecl)*,']', ts, ExtProtoURL -extFieldDecl := fieldExposure,ts,!,dataType,ts,name,ts -ExtProtoURL := '['?,(ts,SFString)*, ts, ']'?, ts # just an MFString by another name :) - -ROUTE := 'ROUTE',ts, !,name,'.',name, ts, 'TO', ts, name,'.',name, ts - -Node := ('DEF',ts,!,name,ts)?,nodegi,ts,'{',ts,(Proto/ExternProto/ROUTE/Attr)*,ts,!,'}', ts - -Script := ('DEF',ts,!,name,ts)?,'Script',ts,!,'{',ts,(ScriptFieldDecl/ScriptEventDecl/Proto/ExternProto/ROUTE/Attr)*,ts,'}', ts -ScriptEventDecl := eventDirection, ts, !,dataType, ts, name, ts, ('IS', ts,!, IS,ts)? -ScriptFieldDecl := fieldExposure,ts,!,dataType,ts,name,ts,(('IS', ts,!,IS,ts)/Field),ts - -SFNull := 'NULL', ts - -# should really have an optimised way of declaring a different reporting name for the same production... -USE := name -IS := name -nodegi := name -Attr := name, ts, (('IS', ts,IS,ts)/Field), ts -Field := ( '[',ts,((SFNumber/SFBool/SFString/('USE',ts,USE,ts)/Script/Node),ts)*, ']'!, ts )/((SFNumber/SFBool/SFNull/SFString/('USE',ts,USE,ts)/Script/Node),ts)+ - -name := -[][0-9{}\000-\020"'#,.\\ ], -[][{}\000-\020"'#,.\\ ]* -SFNumber := [-+]*, ( ('0',[xX],[0-9]+) / ([0-9.]+,([eE],[-+0-9.]+)?)) -SFBool := 'TRUE'/'FALSE' -SFString := '"',(CHARNODBLQUOTE/ESCAPEDCHAR/SIMPLEBACKSLASH)*,'"'! -CHARNODBLQUOTE := -[\134"]+ -SIMPLEBACKSLASH := '\134' -ESCAPEDCHAR := '\\"'/'\134\134' - := ( [ \011-\015,]+ / ('#',-'\012'*,'\n')+ )* -''' - -def buildVRMLParser( declaration = VRMLPARSERDEF ): - return Parser( declaration, "vrmlFile" ) - -if __name__ == "__main__": - import os, sys, time - parser = buildVRMLParser() - if sys.argv[1:]: - filename = sys.argv[1] - data = open(filename).read() - t = time.time() - success, tags, next = parser.parse( data) - d = time.time()-t - print "parsed %s characters of %s in %s seconds (%scps)"%( next, len(data), d, next/(d or 0.000000001) ) - # now show the error-generation - print '''About to parse badly formatted VRML data''' - badData = [ - '''#whatever\nX{ { } }''', - '''#whatever\nX{ S }''', - '''#whatever\nPROTO ]{ S }''', - '''#whatever\nPROTO []{ S ''', - '''#whatever\nPROTO R [ - field SFBool A -]{ }''', - '''#whatever\nPROTO R [ - field SFBool -]{ }''', - '''#whatever\nPROTO R [ - field SFBool A " -]{ ''', - ] - - for bad in badData: - try: - parser.parse( bad ) - print """\nWARNING: didn't get a syntax error for item %s\n"""%(repr(bad)) - except SyntaxError, err: - print err diff -Nru simpleparse-2.1.0a1/examples/vrml.py simpleparse-2.2.0/examples/vrml.py --- simpleparse-2.1.0a1/examples/vrml.py 2002-08-09 06:36:02.000000000 +0000 +++ simpleparse-2.2.0/examples/vrml.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,60 +0,0 @@ -"""VRML97-compliant Parser - -This example is a full VRML97 parser, originally created -for the mcf.vrml VRML-processing system. It supports all -VRML97 constructs, and should be correct for any VRML97 -content you can produce. The parser is fairly fast -(parsing around 280,000 cps on a 1GHz Athlon machine). -""" -from simpleparse.parser import Parser - -#print file -VRMLPARSERDEF = r'''header := -[\n]* -rootItem := ts,(Proto/ExternProto/ROUTE/('USE',ts,USE,ts)/Script/Node),ts -vrmlScene := rootItem* -Proto := 'PROTO',ts,nodegi,ts,'[',ts,(fieldDecl/eventDecl)*,']', ts, '{', ts, vrmlScene,ts, '}', ts -fieldDecl := fieldExposure,ts,dataType,ts,name,ts,Field,ts -fieldExposure := 'field'/'exposedField' -dataType := 'SFBool'/'SFString'/'SFFloat'/'SFTime'/'SFVec3f'/'SFVec2f'/'SFRotation'/'SFInt32'/'SFImage'/'SFColor'/'SFNode'/'MFBool'/'MFString'/'MFFloat'/'MFTime'/'MFVec3f'/'MFVec2f'/'MFRotation'/'MFInt32'/'MFColor'/'MFNode' -eventDecl := eventDirection, ts, dataType, ts, name, ts -eventDirection := 'eventIn'/'eventOut' -ExternProto := 'EXTERNPROTO',ts,nodegi,ts,'[',ts,(extFieldDecl/eventDecl)*,']', ts, ExtProtoURL -extFieldDecl := fieldExposure,ts,dataType,ts,name,ts -ExtProtoURL := '['?,(ts,SFString)*, ts, ']'?, ts # just an MFString by another name :) -ROUTE := 'ROUTE',ts, name,'.',name, ts, 'TO', ts, name,'.',name, ts -Node := ('DEF',ts,name,ts)?,nodegi,ts,'{',ts,(Proto/ExternProto/ROUTE/Attr)*,ts,'}', ts -Script := ('DEF',ts,name,ts)?,'Script',ts,'{',ts,(ScriptFieldDecl/ScriptEventDecl/Proto/ExternProto/ROUTE/Attr)*,ts,'}', ts -ScriptEventDecl := eventDirection, ts, dataType, ts, name, ts, ('IS', ts, IS,ts)? -ScriptFieldDecl := fieldExposure,ts,dataType,ts,name,ts,(('IS', ts,IS,ts)/Field),ts -SFNull := 'NULL', ts - -# should really have an optimised way of declaring a different reporting name for the same production... -USE := name -IS := name -nodegi := name -Attr := name, ts, (('IS', ts,IS,ts)/Field), ts -Field := ( '[',ts,((SFNumber/SFBool/SFString/('USE',ts,USE,ts)/Script/Node),ts)*, ']', ts )/((SFNumber/SFBool/SFNull/SFString/('USE',ts,USE,ts)/Script/Node),ts)+ - -name := -[][0-9{}\000-\020"'#,.\\ ], -[][{}\000-\020"'#,.\\ ]* -SFNumber := [-+]*, ( ('0',[xX],[0-9]+) / ([0-9.]+,([eE],[-+0-9.]+)?)) -SFBool := 'TRUE'/'FALSE' -SFString := '"',(CHARNODBLQUOTE/ESCAPEDCHAR/SIMPLEBACKSLASH)*,'"' -CHARNODBLQUOTE := -[\134"]+ -SIMPLEBACKSLASH := '\134' -ESCAPEDCHAR := '\\"'/'\134\134' - := ( [ \011-\015,]+ / ('#',-'\012'*,'\n')+ )* -''' -def buildVRMLParser( declaration = VRMLPARSERDEF ): - return Parser( declaration, "vrmlScene" ) - -if __name__ == "__main__": - import os, sys, time - if sys.argv[1:]: - filename = sys.argv[1] - data = open(filename).read() - parser = buildVRMLParser() - t = time.time() - success, tags, next = parser.parse( data) - d = time.time()-t - print "parsed %s characters of %s in %s seconds (%scps)"%( next, len(data), d, next/(d or 0.000000001) ) - \ No newline at end of file diff -Nru simpleparse-2.1.0a1/generator.py simpleparse-2.2.0/generator.py --- simpleparse-2.1.0a1/generator.py 2006-02-19 00:40:43.000000000 +0000 +++ simpleparse-2.2.0/generator.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,153 +0,0 @@ -"""Abstract representation of an in-memory grammar that generates parsers""" -from simpleparse.stt.TextTools import TextTools -import traceback - -class Generator: - '''Abstract representation of an in-memory grammar that generates parsers - - The generator class manages a collection of - ElementToken objects. These element token objects - allow the generator to be separated from the - particular parser associated with any particular EBNF - grammar. In fact, it is possible to create entire grammars - using only the generator objects as a python API. - ''' - def __init__( self ): - """Initialise the Generator""" - self.names = [] - self.rootObjects = [] - self.methodSource = None - self.definitionSources = [] - def getNameIndex( self, name ): - '''Return the index into the main list for the given name''' - try: - return self.names.index( name ) - except ValueError: - - for source in self.definitionSources: - if source.has_key( name ): - return self.addDefinition( name, source[name]) -## import pdb -## pdb.set_trace() - raise NameError( '''The name %s is not defined within this generator'''%(repr(name)), self ) - def getRootObjects( self, ): - '''Return the list of root generator objects''' - return self.rootObjects - def getNames( self, ): - '''Return the list of root generator objects''' - return self.names - def getRootObject( self, name ): - """Get a particular root object by name""" - return self.getRootObjects()[ self.getNameIndex(name)] - - def addDefinition( self, name, rootElement ): - '''Add a new definition (object) to the generator''' - try: - self.names.index( name ) - raise NameError( '''Attempt to redefine an existing name %s'''%(name), self ) - except ValueError: - self.names.append( name ) - self.rootObjects.append( rootElement ) - return self.getNameIndex( name ) - def buildParser( self, name, methodSource=None ): - '''Build the given parser definition, returning a TextTools parsing tuple''' - self.parserList = [] - self.terminalParserCache = {} - self.methodSource = methodSource - i = 0 - while i < len(self.rootObjects): - # XXX Note: rootObjects will grow in certain cases where - # a grammar is loading secondary grammars into itself - rootObject = self.rootObjects[i] - try: - if len(self.parserList) <= i or self.parserList[i] is None: - parser = tuple(rootObject.toParser( self )) - self.setTerminalParser( i, parser ) - except NameError,err: - currentRuleName = self.names[i] - err.args = err.args + ('current declaration is %s'%(currentRuleName), ) - raise - i = i + 1 - assert None not in self.parserList, str( self.parserList) - return self.parserList [self.getNameIndex (name)] - def setTerminalParser( self, index, parser ): - """Explicitly set the parser value for given name""" - while index >= len(self.parserList): - self.parserList.append(None) - self.parserList[index] = parser - def getTerminalParser( self, index ): - """Try to retrieve a parser from the parser-list""" - try: - return self.parserList[ index ] - except IndexError: - return None - def cacheCustomTerminalParser( self, index, flags, parser ): - """Optimization to reuse customized terminal parsers""" - self.terminalParserCache[ (index,flags) ] = parser - def getCustomTerminalParser( self, index, flags ): - """Retrieved a cached customized terminal parser or None""" - return self.terminalParserCache.get( (index, flags)) - - def getParserList (self): - return self.parserList - - - def getObjectForName( self, name): - """Determine whether our methodSource has a parsing method for the given name - - returns ( flags or 0 , tagobject) - """ - testName = "_m_"+name - if hasattr( self.methodSource, testName): - method = getattr( self.methodSource, testName ) - if callable(method): - return TextTools.CallTag, method - elif method == TextTools.AppendMatch: - return method, name - elif method in (TextTools.AppendToTagobj, TextTools.AppendTagobj): - object = self.getTagObjectForName( name ) - if method == TextTools.AppendToTagobj: - if not ( hasattr( object, 'append') and callable(object.append)): - raise ValueError( """Method source %s declares production %s to use AppendToTagobj method, but doesn't given an object with an append method in _o_%s (gave %s)"""%(repr(self.methodSource), name,name, repr(object))) - return method, object - else: - raise ValueError( """Unrecognised command value %s (not callable, not one of the Append* constants) found in methodSource %s, name=%s"""%( repr(method),repr(methodSource),name)) - return 0, name - def getTagObjectForName( self, name ): - """Get any explicitly defined tag object for the given name""" - testName = "_o_"+name - if hasattr( self.methodSource, testName): - object = getattr( self.methodSource, testName ) - return object - return name - def addDefinitionSource( self, item ): - """Add a source for definitions when the current grammar doesn't supply - a particular rule (effectively common/shared items for the grammar).""" - self.definitionSources.append( item ) - - -### Compatability API -## This API exists to allow much of the code written with SimpleParse 1.0 -## to work with SimpleParse 2.0 -class GeneratorAPI1: - """Stand-in class supporting operation of SimpleParse 1.0 applications - - There was really only the one method of interest, parserbyname, - everything else was internal (and is now part of - simpleparsegrammar.py). - """ - def __init__( self, production, prebuilt=() ): - from simpleparse.parser import Parser - self.parser = Parser( production, prebuilts=prebuilt ) - def parserbyname( self, name ): - """Retrieve a tag-table by production name""" - return self.parser.buildTagger( name ) - -def buildParser( declaration, prebuiltnodes=() ): - """API 1.0 primary entry point, returns a GeneratorAPI1 instance - - That object will respond to the parserbyname API expected by - SimpleParse 1.0 applications. - """ - return GeneratorAPI1( declaration, prebuiltnodes ) - diff -Nru simpleparse-2.1.0a1/__init__.py simpleparse-2.2.0/__init__.py --- simpleparse-2.1.0a1/__init__.py 2002-08-06 03:31:39.000000000 +0000 +++ simpleparse-2.2.0/__init__.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,6 +0,0 @@ -'''Simple parsing using mxTextTools - -See the /doc subdirectory for introductory and -general documentation. See license.txt for licensing -information. (This is a BSD-licensed package). -''' diff -Nru simpleparse-2.1.0a1/license.txt simpleparse-2.2.0/license.txt --- simpleparse-2.1.0a1/license.txt 2006-02-18 23:49:50.000000000 +0000 +++ simpleparse-2.2.0/license.txt 2015-11-11 18:42:23.000000000 +0000 @@ -4,12 +4,10 @@ Copyright (c) 1997-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com Copyright (c) 2000-2001, eGenix.com Software GmbH; mailto:info@egenix.com - Copyright (c) 2003-2006, Mike Fletcher; mailto:mcfletch@vrplumber.com Copyright, License & Disclaimer for SimpleParse: - © 1998-2006, Copyright by Mike C. Fletcher; All Rights Reserved. - mailto: mcfletch@users.sourceforge.net + © 1998-2015, Copyright by Contributors; All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee or royalty @@ -18,11 +16,16 @@ permission notice appear in supporting documentation or portions thereof, including modifications, that you make. - THE AUTHOR MIKE C. FLETCHER DISCLAIMS ALL WARRANTIES WITH REGARD + THE CONTRIBUTORS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE + MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE! + +Contributors: + + Mike C. Fletcher + Anthony Tuininga diff -Nru simpleparse-2.1.0a1/MANIFEST.in simpleparse-2.2.0/MANIFEST.in --- simpleparse-2.1.0a1/MANIFEST.in 2006-02-19 01:59:10.000000000 +0000 +++ simpleparse-2.2.0/MANIFEST.in 2015-11-11 18:57:04.000000000 +0000 @@ -1,10 +1,12 @@ include MANIFEST.in include license.txt +include tox.ini include setup.py recursive-include . *.py -recursive-include stt * +recursive-include simpleparse/stt * recursive-include doc *.html recursive-include doc *.css +recursive-include tests *.py prune examples/html.py prune examples/py* prune examples/rtf* diff -Nru simpleparse-2.1.0a1/objectgenerator.py simpleparse-2.2.0/objectgenerator.py --- simpleparse-2.1.0a1/objectgenerator.py 2006-02-18 23:13:11.000000000 +0000 +++ simpleparse-2.2.0/objectgenerator.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,781 +0,0 @@ -"""Object-oriented tag-table generator objects - -The objectgenerator module is the core of the SimpleParse -system, the various element token classes defined here -implement transitions from EBNF-style abstractions into -the low-level (assembly-like) instructions to the -TextTools engine. - -Each class within the module is a sub-class of ElementToken, -which provides a number of common facilities, the most -obvious of which is the permute method, which takes care of -the negative, optional, and repeating flags for the normal -case (with character ranges and literals being non-normal). -""" -from simpleparse.stt.TextTools.TextTools import * - -### Direct use of BMS is deprecated now... -try: - TextSearch -except NameError: - TextSearch = BMS - -from simpleparse.error import ParserSyntaxError -import copy - -class ElementToken: - """Abstract base class for all ElementTokens - - Common Attributes: - - negative -- the element token should match - a character if the "base" definition - would not match at the current position - optional -- the element token will match even - if the base definition would not match - at the current position - repeating -- if the element is successfully - matched, attempt to match it again. - lookahead -- if true, the scanning position - of the engine will be reset after the - element matches - errorOnFail -- if true, the engine will call the - object stored in errorOnFail as a text- - matching object iff the element token fails - to match. This is used to signal - SyntaxErrors. - - Attributes only used for top-level Productions: - - report -- if true, the production's results - will be added to the result tree - expanded -- if true, the production's children's - results will be added to the result tree - but the production's own result will be ignored - """ - negative = 0 - optional = 0 - repeating = 0 - report = 1 - # note that optional and errorOnFail are mutually exclusive - errorOnFail = None - # any item may be marked as expanded, - # which says that it's a top-level declaration - # and that links to it should automatically expand - # as if the name wasn't present... - expanded = 0 - lookahead = 0 - - - def __init__( self, **namedarguments ): - """Initialize the object with named attributes - - This method simply takes the named attributes and - updates the object's dictionary with them - """ - self.__dict__.update( namedarguments ) - def toParser( self, generator, noReport=0 ): - """Abstract interface for implementing the conversion to a text-tools table - - generator -- an instance of generator.Generator - which provides various facilities for discovering - other productions. - noReport -- if true, we're being called recursively - for a terminal grammar fragment where one of our - parents has explicitly suppressed all reporting. - - This method is called by the generator or by - another element-token's toParser method. - """ - raise NotImplementedError( '''Element token generator abstract function called''' ) - def permute( self, basetable ): - '''Given a positive, required, non-repeating table, convert to appropriately configured table - - This method applies generic logic for applying the - operational flags to a basic recipe for an element. - - It is normally called from the elements-token's own - toParser method. - ''' - flags = 0 - if self.lookahead: - flags = flags + LookAhead - - assert len(basetable) == 3, '''Attempt to permute a base table that already has fail flag set, can only permute unadorned tables''' - if self.negative: - # negative "matches" if it fails - # we add in the flags while we're at it... - basetable = (None, SubTable+flags, ( - basetable + (1,2), - (None, EOF, Here,2,1), # if we hit eof, this didn't match, otherwise, we matched - (None, Fail, Here),# either hit eof or matched the client - (None,Skip,1), - )) - elif flags: - # unpack, add the flags, and repack - tag, command, arg = basetable - basetable = ( tag, command+flags, arg) - - if self.repeating: - ### There are a number of problems with repetition that we'd like to solve - ### via recursive table calls, but those are very expensive in the current - ### implementation, so we need to use something a little more hacky... - if self.optional: - return [ - ## this would be the "simplistic" implementation... - ## basetable + (1,0) - ## it doesn't work because of cases - ## where all-optional children "succeed" without consuming - ## when within a repeating parent - ## the EOF test isn't enough to fix the problem, - ## as it's only checking a common case, not the underlying failure - basetable +(2,1), # fail, done, succeed, check for eof and if not, try matching again - # if we hit eof, no chance of further matches, - # consider ourselves done - (None, EOF, Here,-1,1), - ] - elif self.errorOnFail: - return [ - basetable+(1,2), - (None, Call, self.errorOnFail), - # as for optional... - basetable +(2,1), - (None, EOF, Here,-1,1), - ] - else: - return [ - basetable, - # as for optional... - basetable +(2,1), - (None, EOF, Here,-1,1), - ] - else: # single - if self.optional: - return [ - basetable +(1,1) - ] - elif self.errorOnFail: - return [ - basetable+(1,2), - (None, Call, self.errorOnFail), - ] - else: # not optional - return [ - basetable - ] - def __repr__( self): - """Return a readily recognisable version of ourself""" - from simpleparse import printers - return printers.asObject( self ) - def terminal (self, generator): - """Determine if this element is terminal for the generator""" - return 0 - - -class Literal( ElementToken ): - """Literal string value to be matched - - Literals are one of the most common elements within - any grammar. The implementation tries to use the - most efficient mechanism available for matching/searching - for a literal value, so the Literal class does not - use the permute method, instead defining explicit - parsing methodologies for each flag and value combination - - Literals in the SimpleParse EBNF grammar are defined like so: - "test", "test"?, "test"*, "test"+ - -"test", -"test"?, -"test"*, -"test"+ - - Attributes: - value -- a string storing the literal's value - - Notes: - Currently we don't support Unicode literals - - See also: - CILiteral -- case-insensitive Literal values - """ - value = "" - def toParser( self, generator=None, noReport=0 ): - """Create the parser for the element token""" - flags = 0 - if self.lookahead: - flags = flags + LookAhead - base = self.baseToParser( generator ) - if flags or self.errorOnFail: - if self.errorOnFail: - return [(None, SubTable+flags, tuple(base),1,2),(None, Call, self.errorOnFail)] - else: - return [(None, SubTable+flags, tuple(base))] - else: - return base - def baseToParser( self, generator=None ): - """Parser generation without considering flag settings""" - svalue = self.value - if self.negative: - if self.repeating: # a repeating negative value, a "search" in effect - if self.optional: # if fails, then go to end of file - return [ (None, sWordStart, TextSearch( svalue ),1,2), (None, Move, ToEOF ) ] - else: # must first check to make sure the current position is not the word, then the same - return [ - (None, Word, svalue, 2,1), - (None, Fail, Here), - (None, sWordStart, TextSearch( svalue ),1,2), - (None, Move, ToEOF ) - ] - #return [ (None, Word, svalue, 2,1),(None, Fail, Here),(None, WordStart, svalue,1,2), (None, Move, ToEOF ) ] - else: # a single-character test saying "not a this" - if self.optional: # test for a success, move back if success, move one forward if failure - if len(svalue) > 1: - return [ (None, Word, svalue, 2,1), - (None, Skip, -len(svalue), 2,2), # backup if this was the word to start of word, succeed - (None, Skip, 1 ) ] # else just move one character and succeed - else: # Uses Is test instead of Word test, should be faster I'd imagine - return [ (None, Is, svalue, 2,1), - (None, Skip, -1, 2,2), # backtrack - (None, Skip, 1 ) ] # else just move one character and succeed - else: # must find at least one character not part of the word, so - if len(svalue) > 1: - return [ (None, Word, svalue, 2,1), - (None, Fail, Here), - (None, Skip, 1 ) ] # else just move one character and succeed - else: #must fail if it finds or move one forward - return [ (None, Is, svalue, 2,1), - (None, Fail, Here), - (None, Skip, 1 ) ] # else just move one character and succeed - else: # positive - if self.repeating: - if self.optional: - if len(svalue) > 1: - return [ (None, Word, svalue, 1,0) ] - else: - return [ (None, Is, svalue, 1,0) ] - else: # not optional - if len(svalue) > 1: - return [ (None, Word, svalue),(None, Word, svalue,1,0) ] - else: - return [ (None, Is, svalue),(None, Is, svalue,1,0) ] - else: # not repeating - if self.optional: - if len(svalue) > 1: - return [ (None, Word, svalue, 1,1) ] - else: - return [ (None, Is, svalue, 1,1) ] - else: # not optional - if len(svalue) > 1: - return [ (None, Word, svalue) ] - else: - return [ (None, Word, svalue) ] - def terminal (self, generator): - """Determine if this element is terminal for the generator""" - return 1 - -class _Range( ElementToken ): - """Range of character values where any one of the characters may match - - The Range token allows you to define a set of characters - (using a mini-grammar) of which any one may match. By using - the repetition flags, it is possible to easily create such - common structures as "names" and "numbers". For example: - - name := [a-zA-Z]+ - number := [0-9.eE]+ - - (Note: those are not beautifully defined examples :) ). - - The mini-grammar for the simpleparsegrammar is defined as follows: - - '[',CHARBRACE?,CHARDASH?, (CHARRANGE/CHARNOBRACE)*, CHARDASH?,']' - - that is, if a literal ']' character is wanted, you must - define the character as the first item in the range. A literal - '-' character must appear as the first character after any - literal ']' character (or the beginning of the range) or as the - last character in the range. - - Note: The expansion from the mini-grammar occurs before the - Range token is created (the simpleparse grammar does the - expansion), so the value attribute of the token is actually - the expanded string of characters. - """ - value = "" - requiresExpandedSet = 1 - def toParser( self, generator=None, noReport=0 ): - """Create the parser for the element token""" - flags = 0 - if self.lookahead: - flags = flags + LookAhead - base = self.baseToParser( generator ) - if flags or self.errorOnFail: - if self.errorOnFail: - return [(None, SubTable+flags, tuple(base),1,2),(None, Call, self.errorOnFail)] - else: - return [(None, SubTable+flags, tuple(base))] - else: - return base - -# this should be a faster and more generic character set -# approach, but there's a bug with mxTextTools b3 which makes -# it non-functional, so for now I'm using the old version. -# Eventually this should also support the Unicode character sets -##try: -## CharSet -## class Range( _Range ): -## """Range type using the CharSet feature of mx.TextTools 2.1.0 -## -## The CharSet type allows for both Unicode and 256-char strings, -## so we can use it as our 2.1.0 primary parsing mechanism. -## It also allows for simpler definitions (doesn't require that -## we pre-exand the character set). That's going to require support -## in the SimpleParse grammar, of course. -## """ -## requiresExpandedSet = 0 -## def baseToParser( self, generator=None ): -## """Parser generation without considering flag settings""" -## svalue = self.value -## print 'generating range for ', repr(svalue) -## if not svalue: -## raise ValueError( '''Range defined with no member values, would cause infinite loop %s'''%(self)) -## if self.negative: -## svalue = '^' + svalue -## print ' generated', repr(svalue) -## svalue = CharSet(svalue) -## if self.repeating: -## if self.optional: -## return [ (None, AllInCharSet, svalue, 1 ) ] -## else: # not optional -## #return [ (None, AllInSet, svalue ) ] -## return [ (None, AllInCharSet, svalue ) ] -## else: # not repeating -## if self.optional: -## #return [ (None, IsInSet, svalue, 1 ) ] -## return [ (None, IsInCharSet, svalue, 1 ) ] -## else: # not optional -## #return [ (None, IsInSet, svalue ) ] -## return [ (None, IsInCharSet, svalue ) ] -##except NameError: -class Range( _Range ): - """Range type which doesn't use the CharSet features in mx.TextTools - - This is likely to be much slower than the CharSet version (below), and - is unable to handle unicode character sets. However, it will work with - TextTools 2.0.3, which may be needed in some cases. - """ - def baseToParser( self, generator=None ): - """Parser generation without considering flag settings""" - svalue = self.value - if not svalue: - raise ValueError( '''Range defined with no member values, would cause infinite loop %s'''%(self)) - if self.negative: - if self.repeating: - if self.optional: - #return [ (None, AllInSet, svalue, 1 ) ] - return [ (None, AllNotIn, svalue, 1 ) ] - else: # not optional - #return [ (None, AllInSet, svalue ) ] - return [ (None, AllNotIn, svalue ) ] - else: # not repeating - if self.optional: - #return [ (None, IsInSet, svalue, 1 ) ] - return [ (None, IsNotIn, svalue, 1 ) ] - else: # not optional - #return [ (None, IsInSet, svalue ) ] - return [ (None, IsNotIn, svalue ) ] - else: - if self.repeating: - if self.optional: - #return [ (None, AllInSet, svalue, 1 ) ] - return [ (None, AllIn, svalue, 1 ) ] - else: # not optional - #return [ (None, AllInSet, svalue ) ] - return [ (None, AllIn, svalue ) ] - else: # not repeating - if self.optional: - #return [ (None, IsInSet, svalue, 1 ) ] - return [ (None, IsIn, svalue, 1 ) ] - else: # not optional - #return [ (None, IsInSet, svalue ) ] - return [ (None, IsIn, svalue ) ] - def terminal (self, generator): - """Determine if this element is terminal for the generator""" - return 1 - -class Group( ElementToken ): - """Abstract base class for all group element tokens - - The primary feature of a group is that it has a set - of element tokens stored in the attribute "children". - """ - children = () - terminalValue = None - def terminal (self, generator): - """Determine if this element is terminal for the generator""" - if self.terminalValue in (0,1): - return self.terminalValue - self.terminalValue = 0 - for item in self.children: - if not item.terminal( generator): - return self.terminalValue - self.terminalValue = 1 - return self.terminalValue - -class SequentialGroup( Group ): - """A sequence of element tokens which must match in a particular order - - A sequential group must match each child in turn - and all children must be satisfied to consider the - group matched. - - Within the simpleparsegrammar, the sequential group - is defined like so: - ("a", b, c, "d") - i.e. a series of comma-separated element token definitions. - """ - def toParser( self, generator=None, noReport=0 ): - elset = [] - for child in self.children: - elset.extend( child.toParser( generator, noReport ) ) - basic = self.permute( (None, SubTable, tuple( elset)) ) - if len(basic) == 1: - first = basic[0] - if len(first) == 3 and first[0] is None and first[1] == SubTable: - return tuple(first[2]) - return basic - -class CILiteral( SequentialGroup ): - """Case-insensitive Literal values - - The CILiteral is a sequence of literal and - character-range values, where each element is - positive and required. Literal values are - composed of those characters which are not - upper-case/lower-case pairs, while the ranges - are all two-character ranges with the upper - and lower forms. - - CILiterals in the SimpleParse EBNF grammar are defined like so: - c"test", c"test"?, c"test"*, c"test"+ - -c"test", -c"test"?, -c"test"*, -c"test"+ - - Attributes: - value -- a string storing the literal's value - - Notes: - Currently we don't support Unicode literals - - A CILiteral will be *much* slower than a - regular literal or character range - """ - value = "" - def toParser( self, generator=None, noReport=0 ): - elset = self.ciParse( self.value ) - if len(elset) == 1: - # XXX should be compressing these out during optimisation... - # pointless declaration of case-insensitivity, - # or a single-character value - pass - basic = self.permute( (None, SubTable, tuple( elset)) ) - if len(basic) == 1: - first = basic[0] - if len(first) == 3 and first[0] is None and first[1] == SubTable: - return tuple(first[2]) - return basic - def ciParse( self, value ): - """Break value into set of case-dependent groups...""" - def equalPrefix( a,b ): - for x in range(len(a)-1): - if a[x] != b[x]: - return x - result = [] - a,b = value.upper(), value.lower() - while a and b: - # is there an equal literal run at the start? - stringPrefix = equalPrefix( a,b ) - if stringPrefix: - result.append( (None, Word, a[:stringPrefix]) ) - a,b = a[stringPrefix:],b[stringPrefix:] - # if we hit the end of the string, that's fine, just return - if not a and b: - break - # otherwise, the next character must be a case-differing pair - result.append( (None, IsIn, a[0]+b[0]) ) - a,b = a[1:], b[1:] - return result - - -class ErrorOnFail(ElementToken): - """When called as a matching function, raises a SyntaxError - - Attributes: - expected -- list of strings describing expected productions - production -- string name of the production that's failing to parse - message -- overrides default message generation if non-null - - - (something,something)+! - (something,something)! - (something,something)+!"Unable to parse somethings in my production" - (something,something)!"Unable to parse somethings in my production" - - if string -> give an explicit message (with optional % values) - else -> use a default string - - """ - production = "" - message = "" - expected = "" - def __call__( self, text, position, end ): - """Method called by mxTextTools iff the base production fails""" - error = ParserSyntaxError( self.message ) - error.message = self.message - error.production = self.production - error.expected= self.expected - error.buffer = text - error.position = position - raise error - def copy( self ): - import copy - return copy.copy( self ) - - - - -class FirstOfGroup( Group ): - """Set of tokens that matches (and stops searching) with the first successful child - - A FirstOf group attempts to match each child in turn, - declaring success with the first successful child, - or failure if none of the children match. - - Within the simpleparsegrammar, the FirstOf group - is defined like so: - ("a" / b / c / "d") - i.e. a series of slash-separated element token definitions. - """ - def toParser( self, generator=None, noReport=0 ): - elset = [] - # should catch condition where a child is optional - # and we are repeating (which causes a crash during - # parsing), but doing so is rather complex and - # requires analysis of the whole grammar. - for el in self.children: - assert not el.optional, """Optional child of a FirstOf group created, this would cause an infinite recursion in the engine, child was %s"""%el - dataset = el.toParser( generator, noReport ) - if len( dataset) == 1:# and len(dataset[0]) == 3: # we can alter the jump states with impunity - elset.append( dataset[0] ) - else: # for now I'm eating the inefficiency and doing an extra SubTable for all elements to allow for easy calculation of jumps within the FO group - elset.append( (None, SubTable, tuple( dataset )) ) - - procset = [] - for i in range( len( elset) -1): # note that we have to treat last el specially - procset.append( elset[i] + (1,len(elset)-i) ) # if success, jump past end - procset.append( elset[-1] ) # will cause a failure if last element doesn't match - procset = tuple(procset) - - basetable = (None, SubTable, procset ) - return self.permute( basetable ) - -class Prebuilt( ElementToken ): - """Holder for pre-built TextTools tag tables - - You can pass in a Pre-built tag table when - creating your grammar, doing so creates - Prebuilt element tokens which can be referenced - by the other element tokens in your grammar. - """ - value = () - def toParser( self, generator=None, noReport=0 ): - return self.value -class LibraryElement( ElementToken ): - """Holder for a prebuilt item with it's own generator""" - generator = None - production = "" - methodSource = None - def toParser( self, generator=None, noReport=0 ): - if self.methodSource is None: - source = generator.methodSource - else: - source = self.methodSource - basetable = self.generator.buildParser( self.production, source ) - try: - if type(basetable[0]) == type(()): - if len(basetable) == 1 and len(basetable[0]) == 3: - basetable = basetable[0] - else: - # this is a table that got returned! - basetable = (None, SubTable, basetable) - return self.permute( basetable ) - except: - print basetable - raise - -class Name( ElementToken ): - """Reference to another rule in the grammar - - The Name element token allows you to reference another - production within the grammar. There are three major - sub-categories of reference depending on both the Name - element token and the referenced table's values. - - if the Name token's report attribute is false, - or the target table's report attribute is false, - or the Name token negative attribute is true, - the Name reference will report nothing in the result tree - - if the target's expand attribute is true, however, - the Name reference will report the children - of the target production without reporting the - target production's results (SubTable match) - - finally: - if the target is not expanded and the Name token - should report something, the generator object is - asked to supply the tag object and flags for - processing the results of the target. See the - generator.MethodSource documentation for details. - - Notes: - expanded and un-reported productions won't get any - methodsource methods called when - they are finished, that's just how I decided to - do it, not sure if there's some case where you'd - want it. As a result, it's possible to have a - method getting called for one instance (where a - name ref is reporting) and not for another (where - the name ref isn't reporting). - """ - value = "" - # following two flags are new ideas in the rewrite... - report = 1 - def toParser( self, generator, noReport=0 ): - """Create the table for parsing a name-reference - - Note that currently most of the "compression" optimisations - occur here. - """ - sindex = generator.getNameIndex( self.value ) - command = TableInList - target = generator.getRootObjects()[sindex] - - reportSelf = ( - (not noReport) and # parent hasn't suppressed reporting - self.report and # we are not suppressing ourselves - target.report and # target doesn't suppress reporting - (not self.negative) and # we aren't a negation, which doesn't report anything by itself - (not target.expanded) # we don't report the expanded production - ) - reportChildren = ( - (not noReport) and # parent hasn't suppressed reporting - self.report and # we are not suppressing ourselves - target.report and # target doesn't suppress reporting - (not self.negative) # we aren't a negation, which doesn't report anything by itself - ) - if reportSelf: - svalue = self.value - else: - svalue = None - - flags = 0 - if target.expanded: - # the target is the root of an expandedname declaration - # so we need to do special processing to make sure that - # it gets properly reported... - command = SubTableInList - tagobject = None - # check for indirected reference to another name... - elif not reportSelf: - tagobject = svalue - else: - flags, tagobject = generator.getObjectForName( svalue ) - if flags: - command = command | flags - if tagobject is None and not flags: - if self.terminal(generator): - if extractFlags(self,reportChildren) != extractFlags(target): - composite = compositeFlags(self,target, reportChildren) - partial = generator.getCustomTerminalParser( sindex,composite) - if partial is not None: - return partial - partial = tuple(copyToNewFlags(target, composite).toParser( - generator, - not reportChildren - )) - generator.cacheCustomTerminalParser( sindex,composite, partial) - return partial - else: - partial = generator.getTerminalParser( sindex ) - if partial is not None: - return partial - partial = tuple(target.toParser( - generator, - not reportChildren - )) - generator.setTerminalParser( sindex, partial) - return partial - # base, required, positive table... - if ( - self.terminal( generator ) and - (not flags) and - isinstance(target, (SequentialGroup,Literal,Name,Range)) - ): - partial = generator.getTerminalParser( sindex ) - if partial is None: - partial = tuple(target.toParser( - generator, - #not reportChildren - )) - generator.setTerminalParser( sindex, partial) - if len(partial) == 1 and len(partial[0]) == 3 and ( - partial[0][0] is None or tagobject is None - ): - # there is a single child - # it doesn't report anything, or we don't - partial = (partial[0][0] or tagobject,)+ partial[0][1:] - else: - partial = (tagobject, Table, tuple(partial)) - return self.permute( partial ) - basetable = ( - tagobject, - command, ( - generator.getParserList (), - sindex, - ) - ) - return self.permute( basetable ) - terminalValue = None - def terminal (self, generator): - """Determine if this element is terminal for the generator""" - if self.terminalValue in (0,1): - return self.terminalValue - self.terminalValue = 0 - target = generator.getRootObject( self.value ) - if target.terminal( generator): - self.terminalValue = 1 - return self.terminalValue - - -def extractFlags( item, report=1 ): - """Extract the flags from an item as a tuple""" - return ( - item.negative, - item.optional, - item.repeating, - item.errorOnFail, - item.lookahead, - item.report and report, - ) -def compositeFlags( first, second, report=1 ): - """Composite flags from two items into overall flag-set""" - result = [] - for a,b in map(None, extractFlags(first, report), extractFlags(second, report)): - result.append( a or b ) - return tuple(result) -def copyToNewFlags( target, flags ): - """Copy target using combined flags""" - new = copy.copy( target ) - for name,value in map(None, - ("negative","optional","repeating","errorOnFail","lookahead",'report'), - flags, - ): - setattr(new, name,value) - return new diff -Nru simpleparse-2.1.0a1/parser.py simpleparse-2.2.0/parser.py --- simpleparse-2.1.0a1/parser.py 2006-02-19 00:42:20.000000000 +0000 +++ simpleparse-2.2.0/parser.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,49 +0,0 @@ -"""Real-world parsers using the SimpleParse EBNF""" -from simpleparse import baseparser, simpleparsegrammar, common - -class Parser( baseparser.BaseParser ): - """EBNF-generated Parsers with results-handling - - The Parser is a two-stage object: - Passed an EBNF definition during initialisation, - it compiles the definition into a tagging table - (which in turn requires creating a tagging table - for parsing the EBNF). - - You then call the parser's parse method to - perform the actual parsing of your data, with the - parser passing the results to your processor object - and then back to you. - """ - def __init__( - self, declaration, root='root', - prebuilts=(), - definitionSources=common.SOURCES, - ): - """Initialise the parser, creating the tagging table for it - - declaration -- simpleparse ebnf declaration of the language being parsed - root -- root production used for parsing if none explicitly specified - prebuilts -- sequence of (name,value) tuples with prebuilt tables, values - can be either objectgenerator EventToken sub-classes or TextTools - tables - definitionSources -- dictionaries of common constructs for use - in building your grammar - """ - self._rootProduction = root - self._declaration = declaration - self._generator = simpleparsegrammar.Parser( - declaration, prebuilts, - definitionSources = definitionSources, - ).generator - def buildTagger( self, production=None, processor=None): - """Get a particular parsing table for a particular production""" - if production is None: - production = self._rootProduction - if processor is None: - processor = self.buildProcessor() - return self._generator.buildParser( - production, - methodSource=processor, - ) - diff -Nru simpleparse-2.1.0a1/PKG-INFO simpleparse-2.2.0/PKG-INFO --- simpleparse-2.1.0a1/PKG-INFO 2006-02-19 01:59:20.000000000 +0000 +++ simpleparse-2.2.0/PKG-INFO 2015-11-11 19:05:34.000000000 +0000 @@ -1,6 +1,6 @@ -Metadata-Version: 1.0 +Metadata-Version: 1.1 Name: SimpleParse -Version: 2.1.0a1 +Version: 2.2.0 Summary: A Parser Generator for Python (w/mxTextTools derivative) Home-page: http://simpleparse.sourceforge.net/ Author: Mike C. Fletcher @@ -10,12 +10,11 @@ Provides a moderately fast parser generator for use with Python, includes a forked version of the mxTextTools text-processing library - modified to eliminate recursive operation and fix a number of + modified to eliminate recursive operation and fix a number of undesirable behaviours. Converts EBNF grammars directly to single-pass parsers for many largely deterministic grammars. - Keywords: parse,parser,parsing,text,ebnf,grammar,generator Platform: Any Classifier: Programming Language :: Python diff -Nru simpleparse-2.1.0a1/printers.py simpleparse-2.2.0/printers.py --- simpleparse-2.1.0a1/printers.py 2006-02-19 00:43:23.000000000 +0000 +++ simpleparse-2.2.0/printers.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,60 +0,0 @@ -"""Utility to print Python code for a given generator object's element tokens""" -import string -class _GeneratorFormatter: - """Singleton Class to give a generator's element tokens as a source string - - Call this as: - printers.asGenerator( generator ) to get a Python source string - that tries to recreate the generator as a set of objectgenerator - element token objects (as seen in simpleparsegrammar). - """ - HEAD = """from simpleparse import generator -from simpleparse.objectgenerator import * -GENERATOR = generator.Generator () - -class Parser: - '''Mix-in class for simpleparse.parser.Parser which uses this GENERATOR - to build tagging tables. You'll likely want to override __init__ to avoid - building a new parser from a grammar (or subclass BaseParser instead of - Parser) - ''' - def buildTagger( self, name=None, processor = None ): - '''Build the tag-table for parsing the EBNF for this parser''' - return GENERATOR.buildParser( name, processor ) - -""" - ITEM = """GENERATOR.addDefinition( - %(name)s, - %(element)s, -) -""" - def __call__( self, generator ): - temp = [self.HEAD] - for name,element in map(None, generator.getNames(), generator.getRootObjects()): - name = repr(name) - element = self.reprObject(element,1) - temp.append( self.ITEM%locals()) - return string.join( temp, "") - def reprObject( self, obj, depth=0, indent=' ' ): - """Return a recognisable version of an objectgenerator element token""" - argTemplate = (indent*(depth+1))+"%s = %s," - temp = ["""%s("""%(obj.__class__.__name__)] - for key,value in obj.__dict__.items(): - if key == 'children': - childTemplate = (indent*(depth+2)) + '%s,' - childTemp = ["["] - for child in value: - childTemp.append(childTemplate%self.reprObject(child,depth+2)) - childTemp.append( (indent*(depth+1))+']' ) - - temp.append( - argTemplate% (key, string.join(childTemp, '\n')) - ) - else: - temp.append( argTemplate%( key, repr(value))) - temp.append( (indent*depth)+')') - return string.join( temp,'\n') - -asGenerator = _GeneratorFormatter() -asObject = asGenerator.reprObject - diff -Nru simpleparse-2.1.0a1/processor.py simpleparse-2.2.0/processor.py --- simpleparse-2.1.0a1/processor.py 2006-02-19 00:45:04.000000000 +0000 +++ simpleparse-2.2.0/processor.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,49 +0,0 @@ -"""Definitions of the MethodSource and Processor APIs""" - -class MethodSource: - """Base class for MethodSource objects (including Processors and Parsers) - Most applications will use either Processor or Parser objects, rather - than directly using a MethodSource object. - - The MethodSource is basically just a generic object whose attributes - are accessed during generation and/or post-processing of parse results. - The following are the special attribute forms for use in - - _m_productionname -- alters the method used in the TextTools - engine for storing results. If this is a callable object, - then call the object with: - object( taglist,text,l,r,subtags ) - - If it is TextTools.AppendToTagobj, then append the result - tuple to the associated object (_o_productionname). This - requires that _o_productionname have an "append" method, - obviously. - - If it is the constant TextTools.AppendMatch, then append - the string value which matched the production. - - If it is TextTools.AppendTagobj, then append the associated - tagobject itself to the results tree. - - _o_productionname -- with AppendToTagobj, AppendTagobj and - cases where there is no _m_productionname defined, this - allows you to provide an explicit tagobject for reporting - in the results tree/getting called with results. - """ - - - -class Processor(MethodSource): - """Provides definition of a generic processing API - - Basically, a Processor has a method __call__ which takes - two arguments, a value (which is either a 3-tuple or a 4-tuple - depending on whether a top-level production is being processed), - and a pointer to the buffer being parsed. - """ - def __call__( self, value, buffer ): - """Process the results of a parsing run over buffer""" - return value - def __repr__( self ): - """Return a representation of the class""" - return "<%s object @ %s>"%( self.__class__.__name__, id(self)) diff -Nru simpleparse-2.1.0a1/setup.cfg simpleparse-2.2.0/setup.cfg --- simpleparse-2.1.0a1/setup.cfg 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/setup.cfg 2015-11-11 19:05:34.000000000 +0000 @@ -0,0 +1,5 @@ +[egg_info] +tag_build = +tag_date = 0 +tag_svn_revision = 0 + diff -Nru simpleparse-2.1.0a1/setup.py simpleparse-2.2.0/setup.py --- simpleparse-2.1.0a1/setup.py 2006-02-19 01:49:29.000000000 +0000 +++ simpleparse-2.2.0/setup.py 2015-11-11 18:42:23.000000000 +0000 @@ -2,85 +2,66 @@ """Installs SimpleParse using distutils Run: - python setup.py install + python setup.py install to install the packages from the source archive. """ -from distutils.command.install_data import install_data -from distutils.core import Extension -from distutils.sysconfig import * -from distutils.core import setup -import os, sys, string +try: + from setuptools import setup, Extension +except ImportError as err: + from distutils.core import setup, Extension +import os, sys + +def findVersion( ): + a = {} + exec( open( os.path.join( 'simpleparse', '__init__.py') ).read(), a, a ) + return a['__version__'] def isPackage( filename ): - return os.path.isdir(filename) and os.path.isfile( os.path.join(filename,'__init__.py')) + """Is the given filename a Python package""" + return ( + os.path.isdir(filename) and + os.path.isfile( os.path.join(filename,'__init__.py')) + ) def packagesFor( filename, basePackage="" ): - """Find all packages in filename""" - set = {} - for item in os.listdir(filename): - dir = os.path.join(filename, item) - if string.lower(item) != 'cvs' and isPackage( dir ): - if basePackage: - moduleName = basePackage+'.'+item - else: - moduleName = item - set[ moduleName] = dir - set.update( packagesFor( dir, moduleName)) - return set -def npFilesFor( dirname, baseDirectory=None ): - """Return all non-python-file filenames in dir""" - result = [] - allResults = [] - badExtensions = ( - '.py','.pyc','.pyo', '.scc','.exe','.zip','.gz', '.def','.so', - '.c','.h','.pkg','.in', - ) - for name in os.listdir(dirname): - path = os.path.join( dirname, name ) - if os.path.isfile( path) and string.lower(os.path.splitext( name )[1]) not in badExtensions: - result.append( path ) - elif os.path.isdir( path ) and name.lower() !='cvs': - allResults.extend( npFilesFor(path,baseDirectory)) - if result: - if baseDirectory is not None: - dirname = os.path.join( baseDirectory, dirname ) - allResults.append( (dirname, result)) - return allResults - -############## -## Following is from Pete Shinners, -## apparently it will work around the reported bug on -## some unix machines where the data files are copied -## to weird locations if the user's configuration options -## were entered during the wrong phase of the moon :) . -from distutils.command.install_data import install_data -class smart_install_data(install_data): - def run(self): - #need to change self.install_dir to the library dir - install_cmd = self.get_finalized_command('install') - self.install_dir = getattr(install_cmd, 'install_lib') - return install_data.run(self) -############## - -packages = packagesFor( ".", 'simpleparse' ) -packages.update( {'simpleparse':'.'} ) - -dataFiles = ( - # XXX ick this is messy! - npFilesFor( 'doc','simpleparse' ) + - npFilesFor( 'stt','simpleparse' ) -) + """Find all packages in filename""" + set = {} + for item in os.listdir(filename): + dir = os.path.join(filename, item) + if item.lower() != 'cvs' and isPackage( dir ): + if basePackage: + moduleName = basePackage+'.'+item + else: + moduleName = item + set[ moduleName] = dir + set.update( packagesFor( dir, moduleName)) + return set + +packages = packagesFor( "simpleparse", 'simpleparse' ) +packages.update( {'simpleparse':'simpleparse'} ) + +options = { + 'sdist': { 'force_manifest':1,'formats':['gztar','zip'] }, +} +if sys.platform == 'win32': + options.setdefault( + 'build_ext',{} + )['define'] = 'BAD_STATIC_FORWARD' + +def abs_rel( path ): + return os.path.normpath( os.path.abspath(path)) + if __name__ == "__main__": - from sys import hexversion - if hexversion >= 0x2030000: - # work around distutils complaints under Python 2.2.x - extraArguments = { - 'classifiers': [ - """Programming Language :: Python""", - """Topic :: Software Development :: Libraries :: Python Modules""", - """Intended Audience :: Developers""", - ], - 'keywords': 'parse,parser,parsing,text,ebnf,grammar,generator', - 'long_description' : """A Parser Generator for Python (w/mxTextTools derivative) + from sys import hexversion + if hexversion >= 0x2030000: + # work around distutils complaints under Python 2.2.x + extraArguments = { + 'classifiers': [ + """Programming Language :: Python""", + """Topic :: Software Development :: Libraries :: Python Modules""", + """Intended Audience :: Developers""", + ], + 'keywords': 'parse,parser,parsing,text,ebnf,grammar,generator', + 'long_description' : """A Parser Generator for Python (w/mxTextTools derivative) Provides a moderately fast parser generator for use with Python, includes a forked version of the mxTextTools text-processing library @@ -88,36 +69,41 @@ undesirable behaviours. Converts EBNF grammars directly to single-pass parsers for many -largely deterministic grammars. -""", - 'platforms': ['Any'], - } - else: - extraArguments = { - } - setup ( - name = "SimpleParse", - version = "2.1.0a1", - description = "A Parser Generator for Python (w/mxTextTools derivative)", - author = "Mike C. Fletcher", - author_email = "mcfletch@users.sourceforge.net", - url = "http://simpleparse.sourceforge.net/", - - package_dir = packages, - - packages = packages.keys(), - data_files = dataFiles, - cmdclass = {'install_data':smart_install_data}, - ext_modules=[ - Extension( - "simpleparse.stt.TextTools.mxTextTools.mxTextTools", - [ - 'stt/TextTools/mxTextTools/mxTextTools.c', - 'stt/TextTools/mxTextTools/mxte.c', - 'stt/TextTools/mxTextTools/mxbmse.c', - ], - include_dirs=['stt/TextTools/mxTextTools'] - ), - ], - **extraArguments - ) +largely deterministic grammars.""", + 'platforms': ['Any'], + } + else: + extraArguments = { + } + setup ( + name = "SimpleParse", + version = findVersion(), + description = "A Parser Generator for Python (w/mxTextTools derivative)", + author = "Mike C. Fletcher", + author_email = "mcfletch@users.sourceforge.net", + url = "http://simpleparse.sourceforge.net/", + + package_dir = packages, + options = options, + + packages = list(packages.keys()), + ext_modules=[ + Extension( + "simpleparse.stt.TextTools.mxTextTools.mxTextTools", + [ + abs_rel(f) for f in + [ + 'simpleparse/stt/TextTools/mxTextTools/mxTextTools.c', + 'simpleparse/stt/TextTools/mxTextTools/mxte.c', + 'simpleparse/stt/TextTools/mxTextTools/mxbmse.c', + ] + ], + include_dirs=[ + abs_rel('simpleparse/stt/TextTools/mxTextTools'), + ], + define_macros=[ ('MX_BUILDING_MXTEXTTOOLS',1), + ('PY_SSIZE_T_CLEAN',1),], + ), + ], + **extraArguments + ) diff -Nru simpleparse-2.1.0a1/simpleparse/baseparser.py simpleparse-2.2.0/simpleparse/baseparser.py --- simpleparse-2.1.0a1/simpleparse/baseparser.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/baseparser.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,66 @@ +"""Base class for real-world parsers (such as parser.Parser)""" +from simpleparse.stt.TextTools.TextTools import * +from simpleparse.generator import Generator + +class BaseParser: + """Class on which real-world parsers build + + Normally you use a sub-class of this class, such as + simpleparser.parser.Parser + """ + _rootProduction = "" + # primary API... + def parse( self, data, production=None, processor=None, start=0, stop=None): + """Parse data with production "production" of this parser + + data -- data to be parsed, a Python string, for now + production -- optional string specifying a non-default production to use + for parsing data + processor -- optional pointer to a Processor or MethodSource object for + use in determining reporting format and/or post-processing the results + of the parsing pass. Can be None if neither is desired (default) + start -- starting index for the parsing, default 0 + stop -- stoping index for the parsing, default len(data) + """ + self.resetBeforeParse() + if processor is None: + processor = self.buildProcessor() + if stop is None: + stop = len(data) + value = tag( data, self.buildTagger( production, processor), start, stop ) + if processor and callable(processor): + return processor( value, data ) + else: + return value + # abstract methods + def buildProcessor( self ): + """Build default processor object for this parser class + + The default implementation returns None. The processor + can either implement the "method source" API (just provides + information about Callouts and the like), or the processor + API and the method-source API. The processor API merely + requires that the object be callable, and have the signature: + + object( (success, children, nextPosition), buffer) + + (Note: your object can treat the first item as a single tuple + if it likes). + + See: simpleparse.processor module for details. + """ + return None + def buildTagger( self, name, processor ): + """Build the tag-table for the parser + + This method must be implemented by your base class and _not_ + call the implementation here. + """ + raise NotImplementedError( """Parser sub-class %s hasn't implemented a buildTagger method"""%(self.__class__.__name__)) + def resetBeforeParse( self ): + """Called just before the parser's parse method starts working, + + Allows you to set up special-purpose structures, such as stacks + or local storage values. There is no base implementation. The + base implementation does nothing. + """ \ No newline at end of file diff -Nru simpleparse-2.1.0a1/simpleparse/common/calendar_names.py simpleparse-2.2.0/simpleparse/common/calendar_names.py --- simpleparse-2.1.0a1/simpleparse/common/calendar_names.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/common/calendar_names.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,103 @@ +"""Locale-specific calendar names (day-of-week and month-of-year) + +These values are those returned by the calendar module. Available +productions: + + locale_day_names + locale_day_names_uc + locale_day_names_lc + Names for the days of the week + + locale_day_abbrs + locale_day_abbrs_uc + locale_day_abbrs_lc + Short-forms (3 characters normally) for + the days of the week. + + locale_month_names + locale_month_names_uc + locale_month_names_lc + Names for the months of the year + + locale_month_abbrs + locale_month_abbrs_uc + locale_month_abbrs_lc + Short-forms (3 characters normally) for + the months of the year + +Interpreters: + MonthNameInterpreter + DayNameInterpreter + Both offer the ability to set an index other + than the default (of 1) for the first item in + the list. +""" +import calendar +from simpleparse import objectgenerator, common + +c = {} + +da = calendar.day_abbr[:] +dn = calendar.day_name[:] +ma = calendar.month_abbr[:] +mn = calendar.month_name[:] + +def _build( name, set ): + # make sure longest equal-prefix items are first + set = set[:] + set.sort() + set.reverse() + l,u,r = [],[],[] + for item in set: + l.append( objectgenerator.Literal( value = item.lower() )) + u.append( objectgenerator.Literal( value = item.upper() )) + r.append( objectgenerator.Literal( value = item )) + c[ name + '_lc' ] = objectgenerator.FirstOfGroup( children = l ) + c[ name + '_uc' ] = objectgenerator.FirstOfGroup( children = u ) + c[ name ] = objectgenerator.FirstOfGroup( children = r ) + +_build( 'locale_day_names', dn ) +_build( 'locale_day_abbrs', da ) + + +_build( 'locale_month_names', mn ) +_build( 'locale_month_abbrs', ma ) + +da = [s.lower() for s in da] +dn = [s.lower() for s in dn] +ma = [s.lower() for s in ma] +mn = [s.lower() for s in mn] + + +common.share( c ) + +class NameInterpreter: + offset = 1 + def __init__( self, offset = 1 ): + self.offset = offset + def __call__( self, info, buffer ): + (tag, left, right, children) = info + value = buffer[left:right].lower() + for table in self.tables: + try: + return table.index( value )+ self.offset + except ValueError: + pass + raise ValueError( """Unrecognised (but parsed) %s name %s at character %s"""%( self.nameType, value, left)) + +class MonthNameInterpreter( NameInterpreter): + """Interpret a month-of-year name as an integer index + + Pass an "offset" value to __init__ to use an offset other + than 1 (Monday = 1), normally 0 (Monday = 0) + """ + nameType = "Month" + tables = (mn,ma) +class DayNameInterpreter( NameInterpreter ): + """Interpret a day-of-week name as an integer index + + Pass an "offset" value to __init__ to use an offset other + than 1 (January = 1), normally 0 (January = 0) + """ + nameType = "Day" + tables = (dn,da) diff -Nru simpleparse-2.1.0a1/simpleparse/common/chartypes.py simpleparse-2.2.0/simpleparse/common/chartypes.py --- simpleparse-2.1.0a1/simpleparse/common/chartypes.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/common/chartypes.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,83 @@ +"""Common locale-specific character types + +Following productions are all based on string module, +with the default locale specified. The first production +is a single character of the class and the second a +repeating character version: + + digit, digits + uppercasechar, uppercase + lowercasechar, lowercase + letter, letters + whitespacechar, whitespace + punctuationchar, punctuation + octdigit, octdigits + hexdigit, hexdigits + printablechar, printable + +For Python versions with the constants in the string module: + ascii_letter, ascii_letters + ascii_lowercasechar, ascii_lowercase + ascii_uppercasechar, ascii_uppercase + + +Following are locale-specific values, both are +single-character values: + + locale_decimal_point -- locale-specific decimal seperator + locale_thousands_seperator -- locale-specific "thousands" seperator + +Others: + + EOF -- Matches iff parsing has reached the end of the buffer + +There are no interpreters provided (the types are considered +too common to provide meaningful interpreters). +""" +from simpleparse import objectgenerator, common +import string, locale +locale.setlocale(locale.LC_ALL, "" ) + +c = {} + +# string-module items... + +for source,single,repeat in [ + ("digits","digit","digits"), + ("ascii_uppercase", "uppercasechar", "uppercase"), + ("ascii_lowercase", "lowercasechar", "lowercase"), + ("ascii_letters", "letter", "letters" ), + ("ascii_letters", "ascii_letter", "ascii_letters" ), # alias + ("ascii_lowercase", "ascii_lowercasechar", "ascii_lowercase"), + ("ascii_uppercase", "ascii_uppercasechar", "ascii_uppercase"), + ("whitespace", "whitespacechar", "whitespace"), + ("punctuation", "punctuationchar", "punctuation"), + ("octdigits", "octdigit", "octdigits"), + ("hexdigits", "hexdigit", "hexdigits"), + ("printable", "printablechar", "printable"), +]: + try: + value = getattr( string, source ) + c[ single ] = objectgenerator.Range( value = value ) + c[ repeat ] = objectgenerator.Range( value = value, repeating =1 ) + except AttributeError: + pass + +# locale-module items +_lc = locale.localeconv() +c[ "locale_decimal_point" ] = objectgenerator.Literal( value = _lc["decimal_point"] ) +c[ "locale_thousands_seperator" ] = objectgenerator.Literal( value = _lc["thousands_sep"] ) + +del _lc + +# common, but not really well defined sets +# this is the set of characters which are interpreted +# specially by Python's string-escaping when they +# follow a \\ char. + +from simpleparse.stt import TextTools +c[ "EOF" ] = objectgenerator.Prebuilt( value = ( + (None, TextTools.EOF, TextTools.Here), +) ) + +common.share( c ) diff -Nru simpleparse-2.1.0a1/simpleparse/common/comments.py simpleparse-2.2.0/simpleparse/common/comments.py --- simpleparse-2.1.0a1/simpleparse/common/comments.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/common/comments.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,72 @@ +"""Common comment formats + +To process, handle the "comment" production, +(the specific named comment formats are all +expanded productions, so you won't get them +returned for processing). + + hash_comment + # to EOL comments + slashslash_comment + // to EOL comments + semicolon_comment + ; to EOL comments + slashbang_comment + c_comment + non-nesting /* */ comments + slashbang_nest_comment + c_nest_comment + nesting /* /* */ */ comments +""" +from simpleparse.parser import Parser +from simpleparse import common, objectgenerator +from simpleparse.common import chartypes + +c = {} + +eolcomments = r""" +### comment formats where the comment goes +### from a marker to the end of the line + +comment := -'\012'* + := ('\r'?,'\n')/EOF + +>hash_comment< := '#', comment, EOL +>semicolon_comment< := ';', comment, EOL +>slashslash_comment< := '//', comment, EOL +""" + +_p = Parser( eolcomments ) +for name in ["hash_comment", "semicolon_comment", "slashslash_comment"]: + c[ name ] = objectgenerator.LibraryElement( + generator = _p._generator, + production = name, + ) + +ccomments = r""" +### comments in format /* comment */ with no recursion allowed +comment := -"*/"* +>slashbang_comment< := '/*', comment, '*/' +""" +_p = Parser( ccomments ) +for name in ["c_comment","slashbang_comment"]: + c[ name ] = objectgenerator.LibraryElement( + generator = _p._generator, + production = "slashbang_comment", + ) + +nccomments = r""" +### nestable C comments of form /* comment /* innercomment */ back to previous */ + := '/*' + := '*/' +comment := (-(comment_stop/comment_start)+/slashbang_nest_comment)* +>slashbang_nest_comment< := comment_start, comment, comment_stop +""" +_p = Parser( nccomments ) +for name in ["c_nest_comment","slashbang_nest_comment"]: + c[ name ] = objectgenerator.LibraryElement( + generator = _p._generator, + production = "slashbang_nest_comment", + ) + +common.share(c) diff -Nru simpleparse-2.1.0a1/simpleparse/common/__init__.py simpleparse-2.2.0/simpleparse/common/__init__.py --- simpleparse-2.1.0a1/simpleparse/common/__init__.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/common/__init__.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,19 @@ +"""Common (library) definitions + +You normally use this module by importing one of our +sub-modules (which automatically registers itself with +the SOURCES list defined here). + +Calling common.share( dictionary ) with a dictionary +mapping string names to element token instances will +make the element tokens available under those string +names in default parsers. Note: a Parser can override +this by specifying an explicit definitionSources +parameter in its initialiser. +""" + +def share( dictionary ): + SOURCES.append( dictionary) + +SOURCES = [ +] \ No newline at end of file diff -Nru simpleparse-2.1.0a1/simpleparse/common/iso_date_loose.py simpleparse-2.2.0/simpleparse/common/iso_date_loose.py --- simpleparse-2.1.0a1/simpleparse/common/iso_date_loose.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/common/iso_date_loose.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,141 @@ +"""Somewhat Looser ISO date format YYYY-MM-DD HH:mm:SS +HH:mm + + ISO_date_loose -- YYYY-MM-DD format, with a month and day optional, + month or day may be specified without leading 0 + ISO_time_loose -- HH:mm:SS format, with minutes and seconds optional + all numbers may be specified without leading 0 + ISO_date_time_loose -- YYYY-MM-DD HH:mm:SS +HH:mm format, + with time optional and TimeZone offset optional, + same format for date and time as above + +Interpreter: + MxInterpreter + Interprets the parse tree as mx.DateTime values + Date and DateTime -> DateTime objects + Time only -> RelativeDateTime +""" +try: + from mx import DateTime + haveMX = 1 +except ImportError: + haveMX = 0 +from simpleparse.parser import Parser +from simpleparse import common, objectgenerator +from simpleparse.common import chartypes, numbers +from simpleparse.dispatchprocessor import * + +c = {} +declaration = """ + := [-] + := ':' +offset_sign := [-+] + +year := int +month := int +day := int +hour := int +minute := int +second := float/int +ISO_date_loose := year, (date_separator, month, (date_separator, day)?)? +ISO_time_loose := hour, (time_separator, minute, (time_separator, second)?)? +offset := offset_sign, offset_hour, time_separator?, offset_minute? +offset_hour := digit, digit +offset_minute := digit, digit + +ISO_date_time_loose := ISO_date_loose, ([T ], ISO_time_loose)?, [ ]?, offset? +""" + +_p = Parser( declaration ) +for name in ["ISO_time_loose","ISO_date_time_loose", "ISO_date_loose"]: + c[ name ] = objectgenerator.LibraryElement( + generator = _p._generator, + production = name, + ) +common.share( c ) + +if haveMX: + class MxInterpreter(DispatchProcessor): + """Interpret a parsed ISO_date_time_loose in GMT/UTC time or localtime + """ + int = numbers.IntInterpreter() + offset_minute = offset_hour = year = month = day = hour = minute = int + + float = numbers.FloatInterpreter() + second = float + + def __init__( + self, + inputLocal = 1, + returnLocal = 1, + ): + self.inputLocal = inputLocal + self.returnLocal = returnLocal + dateName = 'ISO_date_loose' + timeName = 'ISO_time_loose' + def ISO_date_time_loose( self, info, buffer): + """Interpret the loose ISO date + time format""" + (tag, left, right, sublist) = info + set = singleMap( sublist, self, buffer ) + base, time, offset = ( + set.get(self.dateName), + set.get(self.timeName) or DateTime.RelativeDateTime(hour=0,minute=0,second=0), + set.get( "offset" ), + ) + base = base + time + offset = set.get( "offset" ) + if offset is not None: + # an explicit timezone was entered, convert to gmt and return as appropriate... + gmt = base - offset + if self.returnLocal: + return gmt.localtime() + else: + return gmt + # was in the default input locale (either gmt or local) + if self.inputLocal and self.returnLocal: + return base + elif not self.inputLocal and not self.returnLocal: + return base + elif self.inputLocal and not self.returnLocal: + # return gmt from local... + return base.gmtime() + else: + return base.localtime() + def ISO_date_loose( self, info, buffer): + """Interpret the loose ISO date format""" + (tag, left, right, sublist) = info + set = singleMap( sublist, self, buffer ) + return DateTime.DateTime( + set.get("year") or now().year, + set.get("month") or 1, + set.get("day") or 1, + ) + def ISO_time_loose( self, info, buffer): + """Interpret the loose ISO time format""" + (tag, left, right, sublist) = info + set = singleMap( sublist, self, buffer ) + return DateTime.RelativeDateTime( + hour = set.get("hour") or 0, + minute = set.get("minute") or 0, + second = set.get("second") or 0, + ) + + + def offset( self, info, buffer): + """Calculate the time zone offset as a date-time delta""" + (tag, left, right, sublist) = info + set = singleMap( sublist, self, buffer ) + direction = set.get('offset_sign',1) + hour = set.get( "offset_hour", 0) + minute = set.get( "offset_minute", 0) + delta = DateTime.DateTimeDelta( 0, hour*direction, minute*direction) + return delta + + def offset_sign( self , info, buffer): + """Interpret the offset sign as a multiplier""" + (tag, left, right, sublist) = info + v = buffer [left: right] + if v in ' +': + return 1 + else: + return -1 + diff -Nru simpleparse-2.1.0a1/simpleparse/common/iso_date.py simpleparse-2.2.0/simpleparse/common/iso_date.py --- simpleparse-2.1.0a1/simpleparse/common/iso_date.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/common/iso_date.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,153 @@ +"""Canonical ISO date format YYYY-MM-DDTHH:mm:SS+HH:mm + +This parser is _extremely_ strict, and the dates that match it, +though really easy to work with for the computer, are not particularly +readable. See the iso_date_loose module for a slightly relaxed +definition which allows the "T" character to be replaced by a +" " character, and allows a space before the timezone offset, as well +as allowing the integer values to use non-0-padded integers. + + + ISO_date -- YYYY-MM-DD format, with a month and date optional + ISO_time -- HH:mm:SS format, with minutes and seconds optional + ISO_date_time -- YYYY-MM-DD HH:mm:SS+HH:mm format, + with time optional and TimeZone offset optional + +Interpreter: + MxInterpreter + Interprets the parse tree as mx.DateTime values + ISO_date and ISO_time + returns DateTime objects + Time only + returns RelativeDateTime object which, when + added to a DateTime gives you the given time + within that day +""" +try: + from mx import DateTime + haveMX = 1 +except ImportError: + haveMX = 0 +from simpleparse.parser import Parser +from simpleparse import common, objectgenerator +from simpleparse.common import chartypes, numbers +from simpleparse.dispatchprocessor import * + +c = {} + +declaration =""" +year := digit,digit,digit,digit +month := digit,digit +day := digit,digit + +hour := digit,digit +minute := digit,digit +second := digit,digit +offset_sign := [-+] +offset := offset_sign, hour, time_separator?, minute + + := '-' + := ':' + +ISO_date := year, (date_separator, month, (date_separator, day)?)? +ISO_time := hour, (time_separator, minute, (time_separator, second)?)? +ISO_date_time := ISO_date, ([T], ISO_time)?, offset? +""" + + + + +_p = Parser( declaration ) +for name in ["ISO_time","ISO_date", "ISO_date_time"]: + c[ name ] = objectgenerator.LibraryElement( + generator = _p._generator, + production = name, + ) +common.share( c ) + +if haveMX: + class MxInterpreter(DispatchProcessor): + """Interpret a parsed ISO_date_time_loose in GMT/UTC time or localtime + """ + def __init__( + self, + inputLocal = 1, + returnLocal = 1, + ): + self.inputLocal = inputLocal + self.returnLocal = returnLocal + dateName = 'ISO_date' + timeName = 'ISO_time' + def ISO_date_time( self, info, buffer): + """Interpret the loose ISO date + time format""" + (tag, left, right, sublist) = info + set = singleMap( sublist, self, buffer ) + base, time, offset = ( + set.get(self.dateName), + set.get(self.timeName) or DateTime.RelativeDateTime(hour=0,minute=0,second=0), + set.get( "offset" ), + ) + base = base + time + offset = set.get( "offset" ) + if offset is not None: + # an explicit timezone was entered, convert to gmt and return as appropriate... + gmt = base - offset + if self.returnLocal: + return gmt.localtime() + else: + return gmt + # was in the default input locale (either gmt or local) + if self.inputLocal and self.returnLocal: + return base + elif not self.inputLocal and not self.returnLocal: + return base + elif self.inputLocal and not self.returnLocal: + # return gmt from local... + return base.gmtime() + else: + return base.localtime() + def ISO_date( self, info, buffer): + """Interpret the ISO date format""" + (tag, left, right, sublist) = info + set = {} + for item in sublist: + set[ item[0] ] = dispatch( self, item, buffer) + return DateTime.DateTime( + set.get("year") or now().year, + set.get("month") or 1, + set.get("day") or 1, + ) + def ISO_time( self, info, buffer): + """Interpret the ISO time format""" + (tag, left, right, sublist) = info + set = {} + for item in sublist: + set[ item[0] ] = dispatch( self, item, buffer) + return DateTime.RelativeDateTime( + hour = set.get("hour") or 0, + minute = set.get("minute") or 0, + second = set.get("second") or 0, + ) + + integer = numbers.IntInterpreter() + second = offset_minute = offset_hour = year = month = day = hour =minute =integer + + def offset( self, info, buffer): + """Calculate the time zone offset as a date-time delta""" + (tag, left, right, sublist) = info + set = singleMap( sublist, self, buffer ) + direction = set.get('offset_sign',1) + hour = set.get( "hour", 0) + minute = set.get( "minute", 0) + delta = DateTime.DateTimeDelta( 0, hour*direction, minute*direction) + return delta + + def offset_sign( self , info, buffer): + """Interpret the offset sign as a multiplier""" + (tag, left, right, sublist) = info + v = buffer [left: right] + if v in ' +': + return 1 + else: + return -1 + diff -Nru simpleparse-2.1.0a1/simpleparse/common/numbers.py simpleparse-2.2.0/simpleparse/common/numbers.py --- simpleparse-2.1.0a1/simpleparse/common/numbers.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/common/numbers.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,190 @@ +"""Samples showing the parsing of common programming-language constructs + +numbers + integers + int + int_unsigned + + hexidecimal integers + hex + + floats (including exponents, requring a '.' in the literal) + float + floats, with optional integer-only exponents + float_floatexp + floats, with optional integer or float exponents + + imaginary_number + (float/int),[jJ] + + number + hex/float/int + number_full + binary_number/imaginary_number/hex/float/int + + binary_number + signed binary number + 1001001b or 1001001B bit-field format, + optional sign + can be used with number as (binary_number/number) + +Interpreters: + + IntInterpreter + int, int_unsigned + HexInterpreter + hex + FloatInterpreter + float + FloatFloatExpInterpreter + float_floatexp + BinaryInterpreter + binary_number + ImaginaryInterpreter + imaginary_number + +""" +from simpleparse.parser import Parser +from simpleparse import common, objectgenerator +from simpleparse.common import chartypes +from simpleparse.dispatchprocessor import * + +c = {} + +declaration = r""" +# sample for parsing integer and float numbers +# including hexidecimal numbers in 0xFFF format +sign := [-+]+ + + := digits + := hexdigits + +decimal_fraction := '.',int_unsigned? + +# float which is explicitly a float, cannot be an integer +# because it includes a decimal point +explicit_base := sign?, ((int_unsigned, decimal_fraction) / decimal_fraction / (int_unsigned,'.')) + +exponent := int +exponent_loose := explicit_base/int + +float := explicit_base, ([eE],exponent)? +float_floatexp := explicit_base, ([eE],exponent_loose)? + +hex := sign?, '0', [xX], hexdigits +int_unsigned := l_digits +int := sign?, l_digits +binary_digits := [01]+ +binary_number := sign?, binary_digits,('b'/'B') + +imaginary_number := (float/int), [jJ] + +##number := binary_number/hex/float/int +number := hex/float/int +number_full := binary_number/imaginary_number/hex/float/int +""" + +_p = Parser( declaration ) +for name in ["int","hex", "int_unsigned", "number", "float", "binary_number", "float_floatexp", "imaginary_number", "number_full"]: + c[ name ] = objectgenerator.LibraryElement( + generator = _p._generator, + production = name, + ) + +if __name__ == "__main__": + test() + +common.share( c ) + +def _toInt( s, base ): + try: + return int( s, base) + except TypeError: + return int( s, base) +def _toLong( s, base ): + return int( s, base) + +class IntInterpreter(DispatchProcessor): + """Interpret an integer (or unsigned integer) string as an integer""" + def __call__( self, info, buffer): + (tag, left, right, children) = info + try: + return _toInt( buffer[left:right], 10) + except ValueError: + return _toLong( buffer[left:right], 10) +class HexInterpreter(DispatchProcessor): + """Interpret a hexidecimal integer string as an integer value""" + def __call__( self, info, buffer): + (tag, left, right, children) = info + try: + return _toInt( buffer[left:right], 16) + except ValueError: + return _toLong( buffer[left:right], 16) + +class FloatFloatExpInterpreter(DispatchProcessor): + """Interpret a float string as an integer value + Note: we're allowing float exponentiation, which + gives you a nice way to write 2e.5 + """ + def __call__( self, info, buffer): + (tag, left, right, children) = info + tag, l, r, _ = children[0] + base = float( buffer[l:r] ) + if len(children) > 1: + # figure out the exponent... + exp = children[1] + exp = buffer[ exp[1]:exp[2]] +## import pdb +## pdb.set_trace() + exp = float( exp ) + + base = base * (10** exp) + return base +class FloatInterpreter(DispatchProcessor): + """Interpret a standard float value as a float""" + def __call__( self, info, buffer): + (tag, left, right, children) = info + return float( buffer[left:right]) + +import sys +if hasattr( sys,'version_info') and sys.version_info[:2] > (2,0): + class BinaryInterpreter(DispatchProcessor): + def __call__( self, info, buffer): + """Interpret a bitfield set as an integer""" + (tag, left, right, children) = info + return _toInt( buffer[left:right-1], 2) +else: + class BinaryInterpreter(DispatchProcessor): + def __call__( self, info, buffer): + """Interpret a bitfield set as an integer, not sure this algo + is correct, will see I suppose""" + (tag, left, right, children) = info + sign = 1 + if len(children) > 2: + s = children[0] + for schar in buffer[s[1]:s[2]]: + if schar == '-': + sign = sign * -1 + bits = buffer[children[1][1]:children[1][2]] + else: + bits = buffer[children[0][1]:children[0][2]] + value = 0 + for bit in bits: + value = (value << 1) + if bit == '1': + value = value + 1 + return value + +class ImaginaryInterpreter( DispatchProcessor ): + map = { + "float":FloatInterpreter(), + "int":IntInterpreter() + } + def __call__( self, info, buffer): + """Interpret a bitfield set as an integer, not sure this algo + is correct, will see I suppose""" + (tag, left, right, children) = info + base = children[0] + base = self.mapSet[base[0]](base, buffer) + return base * 1j + diff -Nru simpleparse-2.1.0a1/simpleparse/common/phonetics.py simpleparse-2.2.0/simpleparse/common/phonetics.py --- simpleparse-2.1.0a1/simpleparse/common/phonetics.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/common/phonetics.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,76 @@ +"""Phonetic spellings for character values + +At the moment, only contains the "military alphabet" +(Alpha, Bravo ... Yankee, Zulu), which is used as +alternative timezone names by the military and apparently +some aviation groups. Note, these are fairly common spellings, +but they aren't necessarily going to match a particular +usage. I may have missed some of the possibilities... + + military_alphabet_char -- fully spelled out versions of + the Alpha, Bravo ... Yankee, Zulu phonetic alphabet, + including a few minor variations in spelling such as + Xray and X-ray. All characters use title-caps format, + so Zulu, not zulu will match. + military_alphabet_char_lower -- as for above, but with + lowercased versions of the above + +No interpreters are provided. Taking the first character of +the name will always give you the equivalent character uppercase +for the military_alphabet_char and lowercase for the +military_alphabet_char_lower. +""" +from simpleparse import objectgenerator, common + +c = {} + +# note that Juliette comes before Juliet, because +# otherwise Juliette could never match in an FOGroup! +_letters = """Alpha +Bravo +Charlie +Delta +Echo Echo +Foxtrot +Golf Gulf +Hotel +India +Juliette Juliet +Kilo +Lima +Mike +November +Oscar +Papa +Quebec +Romeo +Sierra +Tango +Uniform +Victor +Whiskey +Xray X-ray +Yankee +Zulu""".split() + +set1,set2 = [], [] +for item in _letters: + set1.append( + objectgenerator.Literal( value=item) + ) + set2.append( + objectgenerator.Literal( value=item.lower()) + ) + +military_alphabet_char = objectgenerator.FirstOfGroup( + children = set1 +) +military_alphabet_char_lower = objectgenerator.FirstOfGroup( + children = set2 +) +del set1, set2 + +c[ "military_alphabet_char" ] = military_alphabet_char +c[ "military_alphabet_char_lower" ] = military_alphabet_char_lower + +common.share( c ) diff -Nru simpleparse-2.1.0a1/simpleparse/common/strings.py simpleparse-2.2.0/simpleparse/common/strings.py --- simpleparse-2.1.0a1/simpleparse/common/strings.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/common/strings.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,168 @@ +"""Python string parsers with escape characters + +Python-string-like operation as much as possible, this includes: + support for single and double-quoted strings + support for triple-quoted versions of the same + support for special character escapes as seen in 8-bit python strings + support for octal and hexidecimal character escapes + + + string_single_quote + string_double_quote + string_triple_single + string_triple_double + Individual string types with the above features + + string + Any of the above string types, in a simple FirstOf group + with the triple-quoted types first, then the single quoted + i.e. generated with this grammar: + + string_triple_double/string_triple_single/string_double_quote/string_single_quote + + +Interpreters: + StringInterpreter + Interprets any/all of the above as a normal (non-Raw) Python + regular (non-unicode) string. Hopefully the action is identical + to doing eval( matchedString, {},{}), without the negative security + implications of that approach. Note that you need to make the + interpreter available under each name you use directly in your + grammar, so if you use string_single_quote and string_double_quote + directly, then you need to add: + string_single_quote = myStringInterpreterInstance + string_double_quote = myStringInterpreterInstance + to your processor class. +""" + +from simpleparse.parser import Parser +from simpleparse import common, objectgenerator +from simpleparse.common import chartypes +assert chartypes +from simpleparse.dispatchprocessor import * + +c = {} + +stringDeclaration = r""" +# note that non-delimiter can never be hit by non-triple strings +str := delimiter, (char_no_quote/escaped_char/backslash_char/nondelimiter)*,delimiter + +escaped_char := '\\',( string_special_escapes / ('x',hex_escaped_char) / octal_escaped_char ) +octal_escaped_char := octdigit, octdigit?, octdigit? +hex_escaped_char := hexdigit,hexdigit + +backslash_char := "\\" # i.e. a backslash preceding a non-special char + +""" + +_stringTypeData = [ + ("string_double_quote", """ + := '"' +nondelimiter := -'"' +char_no_quote := -[\\\\"]+ +string_special_escapes := [\\\\abfnrtv"] +"""), + ("string_single_quote", """ + := "'" +nondelimiter := -"'" +char_no_quote := -[\\\\']+ +string_special_escapes := [\\\\abfnrtv'] +"""), + ("string_triple_single", """ +nondelimiter := -"'''" + := "'''" +char_no_quote := -[\\\\']+ +string_special_escapes := [\\\\abfnrtv'] +"""), + ("string_triple_double",''' +nondelimiter := -'"""' + := '"""' +char_no_quote := -[\\\\"]+ +string_special_escapes := [\\\\abfnrtv"] +'''), +] + +for name, partial in _stringTypeData: + _p = Parser( stringDeclaration + partial ) + c[ name ] = objectgenerator.LibraryElement( + generator = _p._generator, + production = "str", + ) +common.share( c ) +_p = Parser( """ +string := string_triple_double/string_triple_single/string_double_quote/string_single_quote +""" ) +c[ "string"] = objectgenerator.LibraryElement( + generator = _p._generator, + production = "string", +) + +class StringInterpreter(DispatchProcessor): + """Processor for converting parsed string values to their "intended" value + + Basically this processor handles de-escaping and stripping the + surrounding quotes, so that you get the string as a Python string + value. You use the processor by creating an instance of + StringInterpreter() as an item in another processor's + methodSource object (often the Parser itself). + + For example: + + class MyProcessor( DispatchProcessor ): + string = StringInterpreter() + + # following would be used if you have, for instance, + # used string_single_quote in an area where double + # or triple-quoted strings are not allowed, but have + # used string in another area. + string_single_quote = string + """ + def string( self, info, buffer): + """Dispatch any of the string types and return the result""" + (tag, left, right, sublist) = info + return dispatch( self, sublist[0], buffer ) + + def string_single_quote( self, info, buffer): + (tag, left, right, sublist) = info + return "".join(dispatchList(self, sublist, buffer)) + string_double_quote = string_single_quote + string_triple_single = string_single_quote + string_triple_double = string_single_quote + + def char_no_quote( self, info, buffer): + (tag, left, right, sublist) = info + return buffer[left:right] + nondelimiter = char_no_quote + + def escaped_char( self, info, buffer): + (tag, left, right, sublist) = info + return "".join(dispatchList(self,sublist,buffer)) + + def octal_escaped_char(self, info, buffer): + (tag, left, right, sublist) = info + return chr(int( buffer[left:right], 8 )) + def hex_escaped_char( self, info, buffer): + (tag, left, right, sublist) = info + return chr(int( buffer[left:right], 16 )) + + def backslash_char( self, info, buffer): + return "\\" + + def string_special_escapes( self, info, buffer): + """Maps "special" escapes to the corresponding characters""" + (tag, left, right, sublist) = info + return self.specialescapedmap[ buffer[left:right]] + specialescapedmap = { + 'a':'\a', + 'b':'\b', + 'f':'\f', + 'n':'\n', + 'r':'\r', + 't':'\t', + 'v':'\v', + '\\':'\\', + '\n':'', + '"':'"', + "'":"'", + } + diff -Nru simpleparse-2.1.0a1/simpleparse/common/timezone_names.py simpleparse-2.2.0/simpleparse/common/timezone_names.py --- simpleparse-2.1.0a1/simpleparse/common/timezone_names.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/common/timezone_names.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,218 @@ +"""Common timezone names (civilian, military and combined) + +These productions are a collection of common civilian and +military timezone names. The list of names is by no means +exhaustive (nor definitive), but it gives most timezones +at least one named value (to make it possible to enter the +name), and it doesn't repeat any names (I hope ;) ). You +have three major classes of names, civilian (EST, PST, GMT, +UTC), military single-character (A,B,C,D,E...) and military +phonetic spelling (Alpha, Bravo... Zulu). The military +variants are combined into a single production, however. + + civilian_timezone_name -- the "familiar" timezones, most + real-world data entry will want to use this as their + "timezone" definition I'm guessing. + + military_timezone_name -- military timezones in the two + formats outlined above. + + timezone_name -- combination of the two above into a + single production. + +Interpreter: + + TimeZoneNameInterpreter -- see below for details, by + default takes the timezone name and converts to + a second offset in West-negative format. Note: + this is the _opposite_ of the time module, but is + the more commonly used format AFAIK. Null matches + will return a default TimeZone as specified. +""" +from simpleparse import objectgenerator, common +from simpleparse.common import phonetics +import time + +c = {} + +timezone_data = [] +civilian_data = [ + # Basically this defines our recognised input locales, + # it is by no means exhaustive, but it gives fairly + # good coverage with minimal overlap + ('NZDT',46800), + ('IDLE',43200), + ('NZST',43200), + ('NZT',43200), + ('AESST',39600), + ('ACSST',37800), + ('CADT',37800), + ('SADT',37800), + ('AEST',36000), + ('EAST',36000), + ('GST',36000), + ('LIGT',36000), + ('ACST',34200), + ('CAST',34200), + ('SAT',34200), + ('AWSST',32400), + ('JST',32400), + ('KST',32400), + ('WDT',32400), + ('MT',30600), + ('AWST',28800), + ('CCT',28800), + ('WADT',28800), + ('WST',28800), + ('JT',27000), + ('WAST',25200), + ('IT',12600), + ('BT',10800), + ('EETDST',10800), + ('MSK', 10800), + ('CETDST',7200), + ('EET',7200), + ('FWT',7200), + ('IST',7200), + ('MEST',7200), + ('METDST',7200), + ('SST',7200), + ('BST',3600), + ('CET',3600), + ('DNT',3600), + ('DST',3600), + ('FST',3600), + ('MET',3600), + ('MEWT',3600), + ('MEZ',3600), + ('NOR',3600), + ('SET',3600), + ('SWT',3600), + ('WETDST',3600), + ('GMT',0), + ('UTC', 0), + ('WET',0), + ('WAT',-3600), + ('NDT',-5400), + ('AT', -7200), + ('ADT',-10800), + ('NFT',-9000), + ('NST',-9000), + ('AST',-14400), + ('EDT',-14400), + ('ZP4',-14400), + ('CDT',-18000), + ('EST',-18000), + ('ZP5',-18000), + ('CST',-21600), + ('MDT',-21600), + ('ZP6',-21600), + ('MST',-25200), + ('PDT',-25200), + ('PST',-28800), + ('YDT',-28800), + ('HDT',-32400), + ('YST',-32400), + ('AKST',-32400), + + ('AHST',-36000), + ('HST',-36000), + ('CAT',-36000), + ('NT',-39600), + ('IDLW',-43200), +] +timezone_data = timezone_data + civilian_data +### add military timezones +##A-I then K-Z are used... +## z = 0 +## a - i, k-m -> + values up to 12 +## n-y - values up to -12 +## what a totally messed up system! +## I've checked with a number of sites, they all seem to think +## it works this way... darned if I can figure out why they don't +## make N -12, o -11 etceteras so that z would come in order and you'd +## have a simple progression around the globe... sigh. +zulu_data = [ + ('A', 3600), ('B', 7200), ('C', 10800), ('D', 14400), ('E', 18000), + ('F', 21600), ('G', 25200), ('H', 28800), ('I', 32400), ('K', 36000), + ('L', 39600), ('M', 43200), + ('N', -3600), ('O', -7200), ('P', -10800), ('Q', -14400), ('R', -18000), + ('S', -21600), ('T', -25200), ('U', -28800), ('V', -32400), ('W', -36000), + ('X', -39600), ('Y', -43200), + ('Z', 0), +] +# now add these, plus the expanded versions to the dict above... +# note that we only allow capitalised versions of the military +# zones! +tztemp = [] +for key, value in zulu_data: + for item in phonetics._letters: + if item[0] == key: + tztemp.append( (item, value) ) +# order is important here, want longer first +zulu_data = tztemp + zulu_data +del tztemp +# and call that done for now, folks... +timezone_data = timezone_data + zulu_data +# the rules are really big, but oh well... +def _build( data ): + """Build the name:time map and match rule for each dataset""" + data = data[:] + data.sort() # get shortest and least values first forcefully... + # then reverse that, to get longest first... + data.reverse() + names = [] + mapping = {} + for key,value in data: + names.append( objectgenerator.Literal(value=key)) + mapping[key] = value + rule = objectgenerator.FirstOfGroup( + children = names + ) + return mapping, rule +zulu_mapping, zulu_rule = _build( zulu_data ) +civilian_mapping, civilian_rule = _build( civilian_data ) +timezone_mapping, timezone_rule = _build( timezone_data ) + +c[ "military_timezone_name" ] = zulu_rule +c[ "civilian_timezone_name" ] = civilian_rule +c[ "timezone_name" ] = timezone_rule + +common.share(c) + +import time +if time.daylight: + LOCAL_ZONE = time.altzone +else: + LOCAL_ZONE = time.timezone +# account for time module's different counting procedure... +LOCAL_ZONE = -LOCAL_ZONE + +class TimeZoneNameInterpreter: + """Intepret a timezone specified as a military or civilian timezone name + + Return value is an offset from UTC given in seconds. + If a null-match is passed uses the passed defaultZone. + Returns values in seconds difference from UTC (negative + West) divided by the passed "seconds" argument. + """ + def __init__( self, defaultZone=LOCAL_ZONE, seconds=1.0): + """ + defaultZone -- ofset in seconds to be returned if there + is no value specified (null-match) + seconds -- divisor applied to the value before returning, + if you want hours, use 3600.0, if you want minutes, + use 60.0, if you want days (why?), use 86400.0 + """ + self.defaultZone = defaultZone + self.seconds = seconds + def __call__( self, info, buffer ): + (tag, left, right, children) = info + value = buffer[ left: right ] + if value: + try: + return timezone_mapping[ value ]/self.seconds + except KeyError: + raise ValueError( "Unrecognised (but parsed!) TimeZone Name %s found at character position %s"%(value, left)) + else: + return self.defaultZone/self.seconds diff -Nru simpleparse-2.1.0a1/simpleparse/dispatchprocessor.py simpleparse-2.2.0/simpleparse/dispatchprocessor.py --- simpleparse-2.1.0a1/simpleparse/dispatchprocessor.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/dispatchprocessor.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,107 @@ +"""Dispatch-processor API + +This is a post-processing processor API based on dispatching +each element of a result tree in a top-down recursive call +structure. It is the API used by the SimpleParseGrammar Parser, +and likely will be the default processor for SimpleParse. +""" +from simpleparse.processor import Processor + +class DispatchProcessor(Processor): + """Dispatch results-tree in a top-down recursive pattern with + attribute lookup to determine production -> method correspondence. + + To use the class, subclass it, then define methods for + processing each production. The methods should take this form: + def production_name( self, (tag, left, right, children), buffer): + pass + Where children may be either a list, or None, and buffer is the + entire buffer being parsed. + """ + def __call__( self, value, buffer ): + """Process the results of the parsing run over buffer + + Value can either be: (success, tags, next) for a top-level + production, or (tag, left, right, children) for a non-top + production. + """ + if len( value ) == 3: + # is a top-level production + success, tags, next = value + if success: + result = dispatchList( self, tags, buffer ) + return success, result, next + else: + return success, tags, next + else: + # is a 4-item result tuple/tree + return dispatch( self, value, buffer ) + + +def dispatch( source, tag, buffer ): + """Dispatch on source for tag with buffer + + Find the attribute or key tag[0] of source, + then call it with (tag, buffer) + """ + try: + function = getattr (source, tag[0]) + except AttributeError: + try: + function = source[tag[0]] + except: + raise AttributeError( '''No processing function for tag "%s" in object %s! Check the parser definition!'''%(tag[0], repr(source))) + return function( tag, buffer ) + +def dispatchList( source, taglist, buffer ): + """Dispatch on source for each tag in taglist with buffer""" + if taglist: + return list(map( dispatch, [source]*len(taglist), taglist, [buffer]*len(taglist))) + else: + return [] + +def multiMap( taglist, source=None, buffer=None ): + """Convert a taglist to a mapping from tag-object:[list-of-tags] + + For instance, if you have items of 3 different types, in any order, + you can retrieve them all sorted by type with multimap( childlist) + then access them by tagobject key. + """ + set = {} + if not taglist: + return set + for tag in taglist: + key = tag[0] + if source and buffer: + tag = dispatch( source, tag, buffer ) + set.setdefault(key,[]).append( tag ) + return set +def singleMap( taglist, source=None, buffer=None ): + """Convert a taglist to a mapping from tag-object:tag, overwritting early with late tags""" + set = {} + if not taglist: + return set + for tag in taglist: + key = tag[0] + if source and buffer: + tag = dispatch( source, tag, buffer ) + set[key] = tag + return set + +def getString(info, buffer): + """Return the string value of the tag passed""" + (tag, left, right, sublist) = info + return buffer[ left:right ] + +try: + from simpleparse.stt.TextTools import countlines +except ImportError: + def lines( start=None, end=None, buffer=None ): + """Return line number in file at character index (string.count version)""" + return buffer.count('\n', start or 0, end or len(buffer)) +else: + def lines( start=None, end=None, buffer=None ): + """Return line number in file at character index (mx.TextTools version)""" + return countlines (buffer[start or 0:end or len(buffer)]) + + diff -Nru simpleparse-2.1.0a1/simpleparse/error.py simpleparse-2.2.0/simpleparse/error.py --- simpleparse-2.1.0a1/simpleparse/error.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/error.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,55 @@ +"""Definition of the ParserSyntaxError raised on parse failure""" +from simpleparse.stt.TextTools.TextTools import countlines + +class ParserSyntaxError( SyntaxError ): + """Sub-class of SyntaxError for use by SimpleParse parsers + + Every instance will have the following attributes: + buffer -- pointer to the source buffer + position -- integer position in buffer where error occured or -1 + production -- the production which failed + expected -- string (currently taken from grammar) describing + what production/element token failed to match + the following will be calculated in order to display + human-friendly error messages: + line -- ~ text line-number or -1 + lineChar -- ~ character on line where parsing failed or -1 + + """ + buffer = "" + position = -1 + line = -1 + production = "" + expected = "" + error_message = None + DEFAULTTEMPLATE = """Failed parsing production "%(production)s" @pos %(position)s (~line %(line)s:%(lineChar)s).\nExpected syntax: %(expected)s\nGot text: %(text)s""" + def __str__( self ): + """Create a string representation of the error""" + if self.error_message: + return '%s: %s'%( self.__class__.__name__, self.messageFormat(self.error_message) ) + else: + return '%s: %s'%( self.__class__.__name__, self.messageFormat() ) + def messageFormat( self, template=None): + """Create a default message for this syntax error""" + if template is None: + template = self.DEFAULTTEMPLATE + line, lineChar = self.getLineCoordinate() + variables = { + "production": self.production, + "position": self.position, + "line": line, + "lineChar": lineChar, + "expected": self.expected or "UNKNOWN", + "text": repr(self.buffer[ self.position:self.position+50 ]), + } + return template % variables + def getLineCoordinate( self ): + """Get (line number, line character) for the error""" + lineChar = self.buffer.rfind('\n', 0, self.position) + if lineChar == -1: # was no \n before the current position + lineChar = self.position + line = 1 + else: + line = countlines( self.buffer[:lineChar] ) + lineChar = self.position-lineChar + return line, lineChar diff -Nru simpleparse-2.1.0a1/simpleparse/generator.py simpleparse-2.2.0/simpleparse/generator.py --- simpleparse-2.1.0a1/simpleparse/generator.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/generator.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,152 @@ +"""Abstract representation of an in-memory grammar that generates parsers""" +from simpleparse.stt.TextTools import TextTools +import traceback + +class Generator: + '''Abstract representation of an in-memory grammar that generates parsers + + The generator class manages a collection of + ElementToken objects. These element token objects + allow the generator to be separated from the + particular parser associated with any particular EBNF + grammar. In fact, it is possible to create entire grammars + using only the generator objects as a python API. + ''' + def __init__( self ): + """Initialise the Generator""" + self.names = [] + self.rootObjects = [] + self.methodSource = None + self.definitionSources = [] + def getNameIndex( self, name ): + '''Return the index into the main list for the given name''' + try: + return self.names.index( name ) + except ValueError: + + for source in self.definitionSources: + if name in source: + return self.addDefinition( name, source[name]) +## import pdb +## pdb.set_trace() + raise NameError( '''The name %s is not defined within this generator'''%(repr(name)), self ) + def getRootObjects( self, ): + '''Return the list of root generator objects''' + return self.rootObjects + def getNames( self, ): + '''Return the list of root generator objects''' + return self.names + def getRootObject( self, name ): + """Get a particular root object by name""" + return self.getRootObjects()[ self.getNameIndex(name)] + + def addDefinition( self, name, rootElement ): + '''Add a new definition (object) to the generator''' + try: + self.names.index( name ) + raise NameError( '''Attempt to redefine an existing name %s'''%(name), self ) + except ValueError: + self.names.append( name ) + self.rootObjects.append( rootElement ) + return self.getNameIndex( name ) + def buildParser( self, name, methodSource=None ): + '''Build the given parser definition, returning a TextTools parsing tuple''' + self.parserList = [] + self.terminalParserCache = {} + self.methodSource = methodSource + i = 0 + while i < len(self.rootObjects): + # XXX Note: rootObjects will grow in certain cases where + # a grammar is loading secondary grammars into itself + rootObject = self.rootObjects[i] + try: + if len(self.parserList) <= i or self.parserList[i] is None: + parser = tuple(rootObject.toParser( self )) + self.setTerminalParser( i, parser ) + except NameError as err: + currentRuleName = self.names[i] + err.args = err.args + ('current declaration is %s'%(currentRuleName), ) + raise + i = i + 1 + assert None not in self.parserList, str( self.parserList) + return self.parserList [self.getNameIndex (name)] + def setTerminalParser( self, index, parser ): + """Explicitly set the parser value for given name""" + while index >= len(self.parserList): + self.parserList.append(None) + self.parserList[index] = parser + def getTerminalParser( self, index ): + """Try to retrieve a parser from the parser-list""" + try: + return self.parserList[ index ] + except IndexError: + return None + def cacheCustomTerminalParser( self, index, flags, parser ): + """Optimization to reuse customized terminal parsers""" + self.terminalParserCache[ (index,flags) ] = parser + def getCustomTerminalParser( self, index, flags ): + """Retrieved a cached customized terminal parser or None""" + return self.terminalParserCache.get( (index, flags)) + + def getParserList (self): + return self.parserList + + + def getObjectForName( self, name): + """Determine whether our methodSource has a parsing method for the given name + + returns ( flags or 0 , tagobject) + """ + testName = "_m_"+name + if hasattr( self.methodSource, testName): + method = getattr( self.methodSource, testName ) + if callable(method): + return TextTools.CallTag, method + elif method == TextTools.AppendMatch: + return method, name + elif method in (TextTools.AppendToTagobj, TextTools.AppendTagobj): + object = self.getTagObjectForName( name ) + if method == TextTools.AppendToTagobj: + if not ( hasattr( object, 'append') and callable(object.append)): + raise ValueError( """Method source %s declares production %s to use AppendToTagobj method, but doesn't given an object with an append method in _o_%s (gave %s)"""%(repr(self.methodSource), name,name, repr(object))) + return method, object + else: + raise ValueError( """Unrecognised command value %s (not callable, not one of the Append* constants) found in methodSource %s, name=%s"""%( repr(method),repr(methodSource),name)) + return 0, name + def getTagObjectForName( self, name ): + """Get any explicitly defined tag object for the given name""" + testName = "_o_"+name + if hasattr( self.methodSource, testName): + object = getattr( self.methodSource, testName ) + return object + return name + def addDefinitionSource( self, item ): + """Add a source for definitions when the current grammar doesn't supply + a particular rule (effectively common/shared items for the grammar).""" + self.definitionSources.append( item ) + + +### Compatability API +## This API exists to allow much of the code written with SimpleParse 1.0 +## to work with SimpleParse 2.0 +class GeneratorAPI1: + """Stand-in class supporting operation of SimpleParse 1.0 applications + + There was really only the one method of interest, parserbyname, + everything else was internal (and is now part of + simpleparsegrammar.py). + """ + def __init__( self, production, prebuilt=() ): + from simpleparse.parser import Parser + self.parser = Parser( production, prebuilts=prebuilt ) + def parserbyname( self, name ): + """Retrieve a tag-table by production name""" + return self.parser.buildTagger( name ) + +def buildParser( declaration, prebuiltnodes=() ): + """API 1.0 primary entry point, returns a GeneratorAPI1 instance + + That object will respond to the parserbyname API expected by + SimpleParse 1.0 applications. + """ + return GeneratorAPI1( declaration, prebuiltnodes ) diff -Nru simpleparse-2.1.0a1/simpleparse/__init__.py simpleparse-2.2.0/simpleparse/__init__.py --- simpleparse-2.1.0a1/simpleparse/__init__.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/__init__.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,7 @@ +'''Simple parsing using mxTextTools + +See the /doc subdirectory for introductory and +general documentation. See license.txt for licensing +information. (This is a BSD-licensed package). +''' +__version__="2.2.0" diff -Nru simpleparse-2.1.0a1/simpleparse/objectgenerator.py simpleparse-2.2.0/simpleparse/objectgenerator.py --- simpleparse-2.1.0a1/simpleparse/objectgenerator.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/objectgenerator.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,783 @@ +"""Object-oriented tag-table generator objects + +The objectgenerator module is the core of the SimpleParse +system, the various element token classes defined here +implement transitions from EBNF-style abstractions into +the low-level (assembly-like) instructions to the +TextTools engine. + +Each class within the module is a sub-class of ElementToken, +which provides a number of common facilities, the most +obvious of which is the permute method, which takes care of +the negative, optional, and repeating flags for the normal +case (with character ranges and literals being non-normal). +""" +from __future__ import print_function + +from simpleparse.stt.TextTools.TextTools import * + +### Direct use of BMS is deprecated now... +try: + TextSearch +except NameError: + TextSearch = BMS + +from simpleparse.error import ParserSyntaxError +import copy + +class ElementToken: + """Abstract base class for all ElementTokens + + Common Attributes: + + negative -- the element token should match + a character if the "base" definition + would not match at the current position + optional -- the element token will match even + if the base definition would not match + at the current position + repeating -- if the element is successfully + matched, attempt to match it again. + lookahead -- if true, the scanning position + of the engine will be reset after the + element matches + errorOnFail -- if true, the engine will call the + object stored in errorOnFail as a text- + matching object iff the element token fails + to match. This is used to signal + SyntaxErrors. + + Attributes only used for top-level Productions: + + report -- if true, the production's results + will be added to the result tree + expanded -- if true, the production's children's + results will be added to the result tree + but the production's own result will be ignored + """ + negative = 0 + optional = 0 + repeating = 0 + report = 1 + # note that optional and errorOnFail are mutually exclusive + errorOnFail = None + # any item may be marked as expanded, + # which says that it's a top-level declaration + # and that links to it should automatically expand + # as if the name wasn't present... + expanded = 0 + lookahead = 0 + + + def __init__( self, **namedarguments ): + """Initialize the object with named attributes + + This method simply takes the named attributes and + updates the object's dictionary with them + """ + self.__dict__.update( namedarguments ) + def toParser( self, generator, noReport=0 ): + """Abstract interface for implementing the conversion to a text-tools table + + generator -- an instance of generator.Generator + which provides various facilities for discovering + other productions. + noReport -- if true, we're being called recursively + for a terminal grammar fragment where one of our + parents has explicitly suppressed all reporting. + + This method is called by the generator or by + another element-token's toParser method. + """ + raise NotImplementedError( '''Element token generator abstract function called''' ) + def permute( self, basetable ): + '''Given a positive, required, non-repeating table, convert to appropriately configured table + + This method applies generic logic for applying the + operational flags to a basic recipe for an element. + + It is normally called from the elements-token's own + toParser method. + ''' + flags = 0 + if self.lookahead: + flags = flags + LookAhead + + assert len(basetable) == 3, '''Attempt to permute a base table that already has fail flag set, can only permute unadorned tables''' + if self.negative: + # negative "matches" if it fails + # we add in the flags while we're at it... + basetable = (None, SubTable+flags, ( + basetable + (1,2), + (None, EOF, Here,2,1), # if we hit eof, this didn't match, otherwise, we matched + (None, Fail, Here),# either hit eof or matched the client + (None,Skip,1), + )) + elif flags: + # unpack, add the flags, and repack + tag, command, arg = basetable + basetable = ( tag, command+flags, arg) + + if self.repeating: + ### There are a number of problems with repetition that we'd like to solve + ### via recursive table calls, but those are very expensive in the current + ### implementation, so we need to use something a little more hacky... + if self.optional: + return [ + ## this would be the "simplistic" implementation... + ## basetable + (1,0) + ## it doesn't work because of cases + ## where all-optional children "succeed" without consuming + ## when within a repeating parent + ## the EOF test isn't enough to fix the problem, + ## as it's only checking a common case, not the underlying failure + basetable +(2,1), # fail, done, succeed, check for eof and if not, try matching again + # if we hit eof, no chance of further matches, + # consider ourselves done + (None, EOF, Here,-1,1), + ] + elif self.errorOnFail: + return [ + basetable+(1,2), + (None, Call, self.errorOnFail), + # as for optional... + basetable +(2,1), + (None, EOF, Here,-1,1), + ] + else: + return [ + basetable, + # as for optional... + basetable +(2,1), + (None, EOF, Here,-1,1), + ] + else: # single + if self.optional: + return [ + basetable +(1,1) + ] + elif self.errorOnFail: + return [ + basetable+(1,2), + (None, Call, self.errorOnFail), + ] + else: # not optional + return [ + basetable + ] + def __repr__( self): + """Return a readily recognisable version of ourself""" + from simpleparse import printers + return printers.asObject( self ) + def terminal (self, generator): + """Determine if this element is terminal for the generator""" + return 0 + + +class Literal( ElementToken ): + """Literal string value to be matched + + Literals are one of the most common elements within + any grammar. The implementation tries to use the + most efficient mechanism available for matching/searching + for a literal value, so the Literal class does not + use the permute method, instead defining explicit + parsing methodologies for each flag and value combination + + Literals in the SimpleParse EBNF grammar are defined like so: + "test", "test"?, "test"*, "test"+ + -"test", -"test"?, -"test"*, -"test"+ + + Attributes: + value -- a string storing the literal's value + + Notes: + Currently we don't support Unicode literals + + See also: + CILiteral -- case-insensitive Literal values + """ + value = "" + def toParser( self, generator=None, noReport=0 ): + """Create the parser for the element token""" + flags = 0 + if self.lookahead: + flags = flags + LookAhead + base = self.baseToParser( generator ) + if flags or self.errorOnFail: + if self.errorOnFail: + return [(None, SubTable+flags, tuple(base),1,2),(None, Call, self.errorOnFail)] + else: + return [(None, SubTable+flags, tuple(base))] + else: + return base + def baseToParser( self, generator=None ): + """Parser generation without considering flag settings""" + svalue = self.value + if self.negative: + if self.repeating: # a repeating negative value, a "search" in effect + if self.optional: # if fails, then go to end of file + return [ (None, sWordStart, TextSearch( svalue ),1,2), (None, Move, ToEOF ) ] + else: # must first check to make sure the current position is not the word, then the same + return [ + (None, Word, svalue, 2,1), + (None, Fail, Here), + (None, sWordStart, TextSearch( svalue ),1,2), + (None, Move, ToEOF ) + ] + #return [ (None, Word, svalue, 2,1),(None, Fail, Here),(None, WordStart, svalue,1,2), (None, Move, ToEOF ) ] + else: # a single-character test saying "not a this" + if self.optional: # test for a success, move back if success, move one forward if failure + if len(svalue) > 1: + return [ (None, Word, svalue, 2,1), + (None, Skip, -len(svalue), 2,2), # backup if this was the word to start of word, succeed + (None, Skip, 1 ) ] # else just move one character and succeed + else: # Uses Is test instead of Word test, should be faster I'd imagine + return [ (None, Is, svalue, 2,1), + (None, Skip, -1, 2,2), # backtrack + (None, Skip, 1 ) ] # else just move one character and succeed + else: # must find at least one character not part of the word, so + if len(svalue) > 1: + return [ (None, Word, svalue, 2,1), + (None, Fail, Here), + (None, Skip, 1 ) ] # else just move one character and succeed + else: #must fail if it finds or move one forward + return [ (None, Is, svalue, 2,1), + (None, Fail, Here), + (None, Skip, 1 ) ] # else just move one character and succeed + else: # positive + if self.repeating: + if self.optional: + if len(svalue) > 1: + return [ (None, Word, svalue, 1,0) ] + else: + return [ (None, Is, svalue, 1,0) ] + else: # not optional + if len(svalue) > 1: + return [ (None, Word, svalue),(None, Word, svalue,1,0) ] + else: + return [ (None, Is, svalue),(None, Is, svalue,1,0) ] + else: # not repeating + if self.optional: + if len(svalue) > 1: + return [ (None, Word, svalue, 1,1) ] + else: + return [ (None, Is, svalue, 1,1) ] + else: # not optional + if len(svalue) > 1: + return [ (None, Word, svalue) ] + else: + return [ (None, Word, svalue) ] + def terminal (self, generator): + """Determine if this element is terminal for the generator""" + return 1 + +class _Range( ElementToken ): + """Range of character values where any one of the characters may match + + The Range token allows you to define a set of characters + (using a mini-grammar) of which any one may match. By using + the repetition flags, it is possible to easily create such + common structures as "names" and "numbers". For example: + + name := [a-zA-Z]+ + number := [0-9.eE]+ + + (Note: those are not beautifully defined examples :) ). + + The mini-grammar for the simpleparsegrammar is defined as follows: + + '[',CHARBRACE?,CHARDASH?, (CHARRANGE/CHARNOBRACE)*, CHARDASH?,']' + + that is, if a literal ']' character is wanted, you must + define the character as the first item in the range. A literal + '-' character must appear as the first character after any + literal ']' character (or the beginning of the range) or as the + last character in the range. + + Note: The expansion from the mini-grammar occurs before the + Range token is created (the simpleparse grammar does the + expansion), so the value attribute of the token is actually + the expanded string of characters. + """ + value = "" + requiresExpandedSet = 1 + def toParser( self, generator=None, noReport=0 ): + """Create the parser for the element token""" + flags = 0 + if self.lookahead: + flags = flags + LookAhead + base = self.baseToParser( generator ) + if flags or self.errorOnFail: + if self.errorOnFail: + return [(None, SubTable+flags, tuple(base),1,2),(None, Call, self.errorOnFail)] + else: + return [(None, SubTable+flags, tuple(base))] + else: + return base + +# this should be a faster and more generic character set +# approach, but there's a bug with mxTextTools b3 which makes +# it non-functional, so for now I'm using the old version. +# Eventually this should also support the Unicode character sets +##try: +## CharSet +## class Range( _Range ): +## """Range type using the CharSet feature of mx.TextTools 2.1.0 +## +## The CharSet type allows for both Unicode and 256-char strings, +## so we can use it as our 2.1.0 primary parsing mechanism. +## It also allows for simpler definitions (doesn't require that +## we pre-exand the character set). That's going to require support +## in the SimpleParse grammar, of course. +## """ +## requiresExpandedSet = 0 +## def baseToParser( self, generator=None ): +## """Parser generation without considering flag settings""" +## svalue = self.value +## print 'generating range for ', repr(svalue) +## if not svalue: +## raise ValueError( '''Range defined with no member values, would cause infinite loop %s'''%(self)) +## if self.negative: +## svalue = '^' + svalue +## print ' generated', repr(svalue) +## svalue = CharSet(svalue) +## if self.repeating: +## if self.optional: +## return [ (None, AllInCharSet, svalue, 1 ) ] +## else: # not optional +## #return [ (None, AllInSet, svalue ) ] +## return [ (None, AllInCharSet, svalue ) ] +## else: # not repeating +## if self.optional: +## #return [ (None, IsInSet, svalue, 1 ) ] +## return [ (None, IsInCharSet, svalue, 1 ) ] +## else: # not optional +## #return [ (None, IsInSet, svalue ) ] +## return [ (None, IsInCharSet, svalue ) ] +##except NameError: +class Range( _Range ): + """Range type which doesn't use the CharSet features in mx.TextTools + + This is likely to be much slower than the CharSet version (below), and + is unable to handle unicode character sets. However, it will work with + TextTools 2.0.3, which may be needed in some cases. + """ + def baseToParser( self, generator=None ): + """Parser generation without considering flag settings""" + svalue = self.value + if not svalue: + raise ValueError( '''Range defined with no member values, would cause infinite loop %s'''%(self)) + if self.negative: + if self.repeating: + if self.optional: + #return [ (None, AllInSet, svalue, 1 ) ] + return [ (None, AllNotIn, svalue, 1 ) ] + else: # not optional + #return [ (None, AllInSet, svalue ) ] + return [ (None, AllNotIn, svalue ) ] + else: # not repeating + if self.optional: + #return [ (None, IsInSet, svalue, 1 ) ] + return [ (None, IsNotIn, svalue, 1 ) ] + else: # not optional + #return [ (None, IsInSet, svalue ) ] + return [ (None, IsNotIn, svalue ) ] + else: + if self.repeating: + if self.optional: + #return [ (None, AllInSet, svalue, 1 ) ] + return [ (None, AllIn, svalue, 1 ) ] + else: # not optional + #return [ (None, AllInSet, svalue ) ] + return [ (None, AllIn, svalue ) ] + else: # not repeating + if self.optional: + #return [ (None, IsInSet, svalue, 1 ) ] + return [ (None, IsIn, svalue, 1 ) ] + else: # not optional + #return [ (None, IsInSet, svalue ) ] + return [ (None, IsIn, svalue ) ] + def terminal (self, generator): + """Determine if this element is terminal for the generator""" + return 1 + +class Group( ElementToken ): + """Abstract base class for all group element tokens + + The primary feature of a group is that it has a set + of element tokens stored in the attribute "children". + """ + children = () + terminalValue = None + def terminal (self, generator): + """Determine if this element is terminal for the generator""" + if self.terminalValue in (0,1): + return self.terminalValue + self.terminalValue = 0 + for item in self.children: + if not item.terminal( generator): + return self.terminalValue + self.terminalValue = 1 + return self.terminalValue + +class SequentialGroup( Group ): + """A sequence of element tokens which must match in a particular order + + A sequential group must match each child in turn + and all children must be satisfied to consider the + group matched. + + Within the simpleparsegrammar, the sequential group + is defined like so: + ("a", b, c, "d") + i.e. a series of comma-separated element token definitions. + """ + def toParser( self, generator=None, noReport=0 ): + elset = [] + for child in self.children: + elset.extend( child.toParser( generator, noReport ) ) + basic = self.permute( (None, SubTable, tuple( elset)) ) + if len(basic) == 1: + first = basic[0] + if len(first) == 3 and first[0] is None and first[1] == SubTable: + return tuple(first[2]) + return basic + +class CILiteral( SequentialGroup ): + """Case-insensitive Literal values + + The CILiteral is a sequence of literal and + character-range values, where each element is + positive and required. Literal values are + composed of those characters which are not + upper-case/lower-case pairs, while the ranges + are all two-character ranges with the upper + and lower forms. + + CILiterals in the SimpleParse EBNF grammar are defined like so: + c"test", c"test"?, c"test"*, c"test"+ + -c"test", -c"test"?, -c"test"*, -c"test"+ + + Attributes: + value -- a string storing the literal's value + + Notes: + Currently we don't support Unicode literals + + A CILiteral will be *much* slower than a + regular literal or character range + """ + value = "" + def toParser( self, generator=None, noReport=0 ): + elset = self.ciParse( self.value ) + if len(elset) == 1: + # XXX should be compressing these out during optimisation... + # pointless declaration of case-insensitivity, + # or a single-character value + pass + basic = self.permute( (None, SubTable, tuple( elset)) ) + if len(basic) == 1: + first = basic[0] + if len(first) == 3 and first[0] is None and first[1] == SubTable: + return tuple(first[2]) + return basic + def ciParse( self, value ): + """Break value into set of case-dependent groups...""" + def equalPrefix( a,b ): + for x in range(len(a)-1): + if a[x] != b[x]: + return x + result = [] + a,b = value.upper(), value.lower() + while a and b: + # is there an equal literal run at the start? + stringPrefix = equalPrefix( a,b ) + if stringPrefix: + result.append( (None, Word, a[:stringPrefix]) ) + a,b = a[stringPrefix:],b[stringPrefix:] + # if we hit the end of the string, that's fine, just return + if not a and b: + break + # otherwise, the next character must be a case-differing pair + result.append( (None, IsIn, a[0]+b[0]) ) + a,b = a[1:], b[1:] + return result + + +class ErrorOnFail(ElementToken): + """When called as a matching function, raises a SyntaxError + + Attributes: + expected -- list of strings describing expected productions + production -- string name of the production that's failing to parse + message -- overrides default message generation if non-null + + + (something,something)+! + (something,something)! + (something,something)+!"Unable to parse somethings in my production" + (something,something)!"Unable to parse somethings in my production" + + if string -> give an explicit message (with optional % values) + else -> use a default string + + """ + production = "" + message = "" + expected = "" + def __call__( self, text, position, end ): + """Method called by mxTextTools iff the base production fails""" + error = ParserSyntaxError( self.message ) + error.error_message = self.message + error.production = self.production + error.expected= self.expected + error.buffer = text + error.position = position + raise error + def copy( self ): + import copy + return copy.copy( self ) + + + + +class FirstOfGroup( Group ): + """Set of tokens that matches (and stops searching) with the first successful child + + A FirstOf group attempts to match each child in turn, + declaring success with the first successful child, + or failure if none of the children match. + + Within the simpleparsegrammar, the FirstOf group + is defined like so: + ("a" / b / c / "d") + i.e. a series of slash-separated element token definitions. + """ + def toParser( self, generator=None, noReport=0 ): + elset = [] + # should catch condition where a child is optional + # and we are repeating (which causes a crash during + # parsing), but doing so is rather complex and + # requires analysis of the whole grammar. + for el in self.children: + assert not el.optional, """Optional child of a FirstOf group created, this would cause an infinite recursion in the engine, child was %s"""%el + dataset = el.toParser( generator, noReport ) + if len( dataset) == 1:# and len(dataset[0]) == 3: # we can alter the jump states with impunity + elset.append( dataset[0] ) + else: # for now I'm eating the inefficiency and doing an extra SubTable for all elements to allow for easy calculation of jumps within the FO group + elset.append( (None, SubTable, tuple( dataset )) ) + + procset = [] + for i in range( len( elset) -1): # note that we have to treat last el specially + procset.append( elset[i] + (1,len(elset)-i) ) # if success, jump past end + procset.append( elset[-1] ) # will cause a failure if last element doesn't match + procset = tuple(procset) + + basetable = (None, SubTable, procset ) + return self.permute( basetable ) + +class Prebuilt( ElementToken ): + """Holder for pre-built TextTools tag tables + + You can pass in a Pre-built tag table when + creating your grammar, doing so creates + Prebuilt element tokens which can be referenced + by the other element tokens in your grammar. + """ + value = () + def toParser( self, generator=None, noReport=0 ): + return self.value +class LibraryElement( ElementToken ): + """Holder for a prebuilt item with it's own generator""" + generator = None + production = "" + methodSource = None + def toParser( self, generator=None, noReport=0 ): + if self.methodSource is None: + source = generator.methodSource + else: + source = self.methodSource + basetable = self.generator.buildParser( self.production, source ) + try: + if type(basetable[0]) == type(()): + if len(basetable) == 1 and len(basetable[0]) == 3: + basetable = basetable[0] + else: + # this is a table that got returned! + basetable = (None, SubTable, basetable) + return self.permute( basetable ) + except: + print(basetable) + raise + +class Name( ElementToken ): + """Reference to another rule in the grammar + + The Name element token allows you to reference another + production within the grammar. There are three major + sub-categories of reference depending on both the Name + element token and the referenced table's values. + + if the Name token's report attribute is false, + or the target table's report attribute is false, + or the Name token negative attribute is true, + the Name reference will report nothing in the result tree + + if the target's expand attribute is true, however, + the Name reference will report the children + of the target production without reporting the + target production's results (SubTable match) + + finally: + if the target is not expanded and the Name token + should report something, the generator object is + asked to supply the tag object and flags for + processing the results of the target. See the + generator.MethodSource documentation for details. + + Notes: + expanded and un-reported productions won't get any + methodsource methods called when + they are finished, that's just how I decided to + do it, not sure if there's some case where you'd + want it. As a result, it's possible to have a + method getting called for one instance (where a + name ref is reporting) and not for another (where + the name ref isn't reporting). + """ + value = "" + # following two flags are new ideas in the rewrite... + report = 1 + def toParser( self, generator, noReport=0 ): + """Create the table for parsing a name-reference + + Note that currently most of the "compression" optimisations + occur here. + """ + sindex = generator.getNameIndex( self.value ) + command = TableInList + target = generator.getRootObjects()[sindex] + + reportSelf = ( + (not noReport) and # parent hasn't suppressed reporting + self.report and # we are not suppressing ourselves + target.report and # target doesn't suppress reporting + (not self.negative) and # we aren't a negation, which doesn't report anything by itself + (not target.expanded) # we don't report the expanded production + ) + reportChildren = ( + (not noReport) and # parent hasn't suppressed reporting + self.report and # we are not suppressing ourselves + target.report and # target doesn't suppress reporting + (not self.negative) # we aren't a negation, which doesn't report anything by itself + ) + if reportSelf: + svalue = self.value + else: + svalue = None + + flags = 0 + if target.expanded: + # the target is the root of an expandedname declaration + # so we need to do special processing to make sure that + # it gets properly reported... + command = SubTableInList + tagobject = None + # check for indirected reference to another name... + elif not reportSelf: + tagobject = svalue + else: + flags, tagobject = generator.getObjectForName( svalue ) + if flags: + command = command | flags + if tagobject is None and not flags: + if self.terminal(generator): + if extractFlags(self,reportChildren) != extractFlags(target): + composite = compositeFlags(self,target, reportChildren) + partial = generator.getCustomTerminalParser( sindex,composite) + if partial is not None: + return partial + partial = tuple(copyToNewFlags(target, composite).toParser( + generator, + not reportChildren + )) + generator.cacheCustomTerminalParser( sindex,composite, partial) + return partial + else: + partial = generator.getTerminalParser( sindex ) + if partial is not None: + return partial + partial = tuple(target.toParser( + generator, + not reportChildren + )) + generator.setTerminalParser( sindex, partial) + return partial + # base, required, positive table... + if ( + self.terminal( generator ) and + (not flags) and + isinstance(target, (SequentialGroup,Literal,Name,Range)) + ): + partial = generator.getTerminalParser( sindex ) + if partial is None: + partial = tuple(target.toParser( + generator, + #not reportChildren + )) + generator.setTerminalParser( sindex, partial) + if len(partial) == 1 and len(partial[0]) == 3 and ( + partial[0][0] is None or tagobject is None + ): + # there is a single child + # it doesn't report anything, or we don't + partial = (partial[0][0] or tagobject,)+ partial[0][1:] + else: + partial = (tagobject, Table, tuple(partial)) + return self.permute( partial ) + basetable = ( + tagobject, + command, ( + generator.getParserList (), + sindex, + ) + ) + return self.permute( basetable ) + terminalValue = None + def terminal (self, generator): + """Determine if this element is terminal for the generator""" + if self.terminalValue in (0,1): + return self.terminalValue + self.terminalValue = 0 + target = generator.getRootObject( self.value ) + if target.terminal( generator): + self.terminalValue = 1 + return self.terminalValue + + +def extractFlags( item, report=1 ): + """Extract the flags from an item as a tuple""" + return ( + item.negative, + item.optional, + item.repeating, + item.errorOnFail, + item.lookahead, + item.report and report, + ) +def compositeFlags( first, second, report=1 ): + """Composite flags from two items into overall flag-set""" + result = [] + for a,b in zip(extractFlags(first, report), extractFlags(second, report)): + result.append( a or b ) + return tuple(result) +def copyToNewFlags( target, flags ): + """Copy target using combined flags""" + new = copy.copy( target ) + for name,value in zip( + ("negative","optional","repeating","errorOnFail","lookahead",'report'), + flags, + ): + setattr(new, name,value) + return new diff -Nru simpleparse-2.1.0a1/simpleparse/parser.py simpleparse-2.2.0/simpleparse/parser.py --- simpleparse-2.1.0a1/simpleparse/parser.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/parser.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,49 @@ +"""Real-world parsers using the SimpleParse EBNF""" +from simpleparse import baseparser, simpleparsegrammar, common + +class Parser( baseparser.BaseParser ): + """EBNF-generated Parsers with results-handling + + The Parser is a two-stage object: + Passed an EBNF definition during initialisation, + it compiles the definition into a tagging table + (which in turn requires creating a tagging table + for parsing the EBNF). + + You then call the parser's parse method to + perform the actual parsing of your data, with the + parser passing the results to your processor object + and then back to you. + """ + def __init__( + self, declaration, root='root', + prebuilts=(), + definitionSources=common.SOURCES, + ): + """Initialise the parser, creating the tagging table for it + + declaration -- simpleparse ebnf declaration of the language being parsed + root -- root production used for parsing if none explicitly specified + prebuilts -- sequence of (name,value) tuples with prebuilt tables, values + can be either objectgenerator EventToken sub-classes or TextTools + tables + definitionSources -- dictionaries of common constructs for use + in building your grammar + """ + self._rootProduction = root + self._declaration = declaration + self._generator = simpleparsegrammar.Parser( + declaration, prebuilts, + definitionSources = definitionSources, + ).generator + def buildTagger( self, production=None, processor=None): + """Get a particular parsing table for a particular production""" + if production is None: + production = self._rootProduction + if processor is None: + processor = self.buildProcessor() + return self._generator.buildParser( + production, + methodSource=processor, + ) + \ No newline at end of file diff -Nru simpleparse-2.1.0a1/simpleparse/printers.py simpleparse-2.2.0/simpleparse/printers.py --- simpleparse-2.1.0a1/simpleparse/printers.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/printers.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,58 @@ +"""Utility to print Python code for a given generator object's element tokens""" +class _GeneratorFormatter: + """Singleton Class to give a generator's element tokens as a source string + + Call this as: + printers.asGenerator( generator ) to get a Python source string + that tries to recreate the generator as a set of objectgenerator + element token objects (as seen in simpleparsegrammar). + """ + HEAD = """from simpleparse import generator +from simpleparse.objectgenerator import * +GENERATOR = generator.Generator () + +class Parser: + '''Mix-in class for simpleparse.parser.Parser which uses this GENERATOR + to build tagging tables. You'll likely want to override __init__ to avoid + building a new parser from a grammar (or subclass BaseParser instead of + Parser) + ''' + def buildTagger( self, name=None, processor = None ): + '''Build the tag-table for parsing the EBNF for this parser''' + return GENERATOR.buildParser( name, processor ) + +""" + ITEM = """GENERATOR.addDefinition( + %(name)s, + %(element)s, +) +""" + def __call__( self, generator ): + temp = [self.HEAD] + for name,element in zip(generator.getNames(), generator.getRootObjects()): + name = repr(name) + element = self.reprObject(element,1) + temp.append( self.ITEM%locals()) + return "".join(temp) + def reprObject( self, obj, depth=0, indent=' ' ): + """Return a recognisable version of an objectgenerator element token""" + argTemplate = (indent*(depth+1))+"%s = %s," + temp = ["""%s("""%(obj.__class__.__name__)] + for key,value in list(obj.__dict__.items()): + if key == 'children': + childTemplate = (indent*(depth+2)) + '%s,' + childTemp = ["["] + for child in value: + childTemp.append(childTemplate%self.reprObject(child,depth+2)) + childTemp.append( (indent*(depth+1))+']' ) + + temp.append( + argTemplate% (key, '\n'.join(childTemp)) + ) + else: + temp.append( argTemplate%( key, repr(value))) + temp.append( (indent*depth)+')') + return '\n'.join(temp) + +asGenerator = _GeneratorFormatter() +asObject = asGenerator.reprObject diff -Nru simpleparse-2.1.0a1/simpleparse/processor.py simpleparse-2.2.0/simpleparse/processor.py --- simpleparse-2.1.0a1/simpleparse/processor.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/processor.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,49 @@ +"""Definitions of the MethodSource and Processor APIs""" + +class MethodSource(object): + """Base class for MethodSource objects (including Processors and Parsers) + Most applications will use either Processor or Parser objects, rather + than directly using a MethodSource object. + + The MethodSource is basically just a generic object whose attributes + are accessed during generation and/or post-processing of parse results. + The following are the special attribute forms for use in + + _m_productionname -- alters the method used in the TextTools + engine for storing results. If this is a callable object, + then call the object with: + object( taglist,text,l,r,subtags ) + + If it is TextTools.AppendToTagobj, then append the result + tuple to the associated object (_o_productionname). This + requires that _o_productionname have an "append" method, + obviously. + + If it is the constant TextTools.AppendMatch, then append + the string value which matched the production. + + If it is TextTools.AppendTagobj, then append the associated + tagobject itself to the results tree. + + _o_productionname -- with AppendToTagobj, AppendTagobj and + cases where there is no _m_productionname defined, this + allows you to provide an explicit tagobject for reporting + in the results tree/getting called with results. + """ + + + +class Processor(MethodSource): + """Provides definition of a generic processing API + + Basically, a Processor has a method __call__ which takes + two arguments, a value (which is either a 3-tuple or a 4-tuple + depending on whether a top-level production is being processed), + and a pointer to the buffer being parsed. + """ + def __call__( self, value, buffer ): + """Process the results of a parsing run over buffer""" + return value + def __repr__( self ): + """Return a representation of the class""" + return "<%s object @ %s>"%( self.__class__.__name__, id(self)) diff -Nru simpleparse-2.1.0a1/simpleparse/simpleparsegrammar.py simpleparse-2.2.0/simpleparse/simpleparsegrammar.py --- simpleparse-2.1.0a1/simpleparse/simpleparsegrammar.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/simpleparsegrammar.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,725 @@ +'''Default SimpleParse EBNF grammar as a generator with productions + +This module defines the original SimpleParse +grammar. It uses the generator objects directly +as this is the first grammar being written. +''' +from simpleparse.objectgenerator import * +from simpleparse import generator, baseparser +from simpleparse.dispatchprocessor import * + +try: + _unichr = unichr + _unicode = unicode +except NameError: + _unichr = chr + _unicode = str + +# note that whitespace is slightly different +# due to a bug with NULL-matching repeating groups +# we make all the ts references ts? +whitespace = Name (value = "ts", report = 0) +element_token = Name( value = "element_token" ) +literal = Name ( value = "literal") +group = Name ( value = "group") +characterrange = Name ( value = "range") +name = Name ( value = "name") + + +SPGenerator = generator.Generator () + +SPGenerator.addDefinition( + "declarationset", + Name (value = "declaration", repeating = 1), +) + + + +SPGenerator.addDefinition ( + "declaration", + SequentialGroup ( + children = [ + whitespace, + FirstOfGroup ( + children = [ + Name (value = "unreportedname", ), + Name (value = "expandedname", ), + Name (value = "name", ), + ], + ), + whitespace, + Literal (value = ":"), + Literal (value = ":", optional=1), + Literal (value = "=",), + Name( value = "seq_group"), + ], + ) +) + +SPGenerator.addDefinition ( + "group", + SequentialGroup ( + children = [ + Literal (value ="("), + Name( value= "seq_group"), + Literal (value =")"), + ], + expanded = 1, + ) +) + +_seq_children = FirstOfGroup( + children = [ + Name(value="error_on_fail"), + Name(value="fo_group"), + Name(value="element_token"), + ], +) + +SPGenerator.addDefinition ( + "seq_group", + SequentialGroup ( + children = [ + whitespace, + _seq_children, + SequentialGroup( + children = [ + whitespace, + Name( value="seq_indicator"), + whitespace, + _seq_children, + ], + repeating = 1, optional = 1, + ), + whitespace, + ], + ), +) + +SPGenerator.addDefinition ( + "fo_group", + SequentialGroup ( + children = [ + element_token, + SequentialGroup( + children = [ + whitespace, + Name( value="fo_indicator"), + whitespace, + element_token, + ], + repeating = 1, + ), + ], + ) +) +SPGenerator.addDefinition ( + "seq_indicator", + Literal(value = ",", report=0 ), +) +SPGenerator.addDefinition ( + "fo_indicator", + Literal(value = "/", report=0 ), +) + +SPGenerator.addDefinition ( + "element_token", + SequentialGroup ( + children = [ + Name (value = "lookahead_indicator", optional = 1), + whitespace, + Name (value = "negpos_indicator", optional = 1), + whitespace, + FirstOfGroup ( + children = [ + literal, + characterrange, + group, + name, + ] + ), + whitespace, + Name (value = "occurence_indicator", optional = 1), + whitespace, + Name (value = "error_on_fail", optional = 1), + ] + ) +) + +SPGenerator.addDefinition ( + "negpos_indicator", + Range (value = "+-" ) +) +SPGenerator.addDefinition ( + "lookahead_indicator", + Literal(value = "?" ), +) + +SPGenerator.addDefinition ( + "occurence_indicator", + Range (value = "+*?" ), +) +SPGenerator.addDefinition ( + "error_on_fail", + SequentialGroup ( + children = [ + Literal (value ="!"), + SequentialGroup ( + children = [ + whitespace, + Name( value="literal"), + ], + optional = 1, + ), + ], + ), +) + +SPGenerator.addDefinition ( + "unreportedname", + SequentialGroup ( + children = [ + Literal (value ="<"), + whitespace, + name, + whitespace, + Literal (value =">"), + ] + ) +) +SPGenerator.addDefinition ( + "expandedname", + SequentialGroup ( + children = [ + Literal (value =">"), + whitespace, + name, + whitespace, + Literal (value ="<"), + ] + ) +) + +SPGenerator.addDefinition ( + "name", + SequentialGroup ( + children = [ + Range(value ='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'), + Range(value ='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789', optional= 1, repeating= 1), + ] + ) +) + +SPGenerator.addDefinition ( + "ts", # ( [ \011-\015]+ / ('#',-'\n'+,'\n')+ )* + FirstOfGroup ( + children = [ + Range(value =' \011\012\013\014\015', repeating=1), + Name( value = "comment" ), + ], + repeating = 1, optional=1, + ) +) +SPGenerator.addDefinition ( + "comment", # ( [ \011-\015]+ / ('#',-'\n'+,'\n')+ )* + SequentialGroup ( + children = [ + Literal ( value ="#"), + Literal (value ="\n", negative = 1, repeating = 1, optional=1), + Literal (value = "\n",), + ], + ), +) + +SPGenerator.addDefinition ( + "literalDecorator", # literalDecorator := [c] + Range( value = 'c' ) +) + +SPGenerator.addDefinition ( + "literal", # ("'",(CHARNOSNGLQUOTE/ESCAPEDCHAR)*,"'") / ('"',(CHARNODBLQUOTE/ESCAPEDCHAR)*,'"') + SequentialGroup( + children = [ + Name( value = 'literalDecorator', optional=1 ), + FirstOfGroup ( + children = [ + SequentialGroup ( + children = [ + Literal (value ="'"), + FirstOfGroup ( + children = [ + Name (value = "CHARNOSNGLQUOTE"), + Name (value = "ESCAPEDCHAR"), + ], + optional = 1, repeating = 1, + ), + Literal (value ="'"), + ], + ), + SequentialGroup ( + children = [ + Literal (value ='"'), + FirstOfGroup ( + children = [ + Name (value = "CHARNODBLQUOTE"), + Name (value = "ESCAPEDCHAR"), + ], + optional = 1, repeating = 1, + ), + Literal (value ='"'), + ], + ) + ], + ), + ], + ) +) + +SPGenerator.addDefinition ( + "range", # '[',CHARBRACE?,CHARDASH?, (CHARRANGE/CHARNOBRACE)*, CHARDASH?,']' + SequentialGroup ( + children =[ + Literal (value ="["), + Name (value ="CHARBRACE",optional = 1), + Name (value ="CHARDASH",optional = 1), + FirstOfGroup( + children = [ + Name (value ="CHARRANGE"), + Name (value ="CHARNOBRACE"), + ], + optional = 1, repeating = 1, + ), + Name (value ="CHARDASH",optional = 1), + Literal (value ="]"), + ], + ) +) +SPGenerator.addDefinition ( + "CHARBRACE", + Literal (value = "]"), +) +SPGenerator.addDefinition ( + "CHARDASH", + Literal (value = "-"), +) +SPGenerator.addDefinition ( + "CHARRANGE", # CHARRANGE := CHARNOBRACE, '-', CHARNOBRACE + SequentialGroup ( + children =[ + Name (value ="CHARNOBRACE"), + Literal (value ="-"), + Name (value ="CHARNOBRACE"), + ], + ), +) +SPGenerator.addDefinition ( + "CHARNOBRACE", # CHARRANGE := CHARNOBRACE, '-', CHARNOBRACE + FirstOfGroup( + children =[ + Name (value ="ESCAPEDCHAR"), + Name (value ="CHAR"), + ], + ), +) +SPGenerator.addDefinition ( + "CHAR", + Literal ( + value ="]", + negative = 1, + ), +) + +SPGenerator.addDefinition ( + "ESCAPEDCHAR", # '\\',( SPECIALESCAPEDCHAR / ('x',HEXESCAPEDCHAR) / UNICODEESCAPEDCHAR_16 / OCTALESCAPEDCHAR / ) + SequentialGroup ( + children =[ + Literal (value ="\\"), + FirstOfGroup( + children = [ + Name (value ="SPECIALESCAPEDCHAR"), + SequentialGroup( + children = [ + Range( value = 'xX' ), + Name( value="HEXESCAPEDCHAR"), + ] + ), + Name (value ="OCTALESCAPEDCHAR"), + SequentialGroup( + children = [ + Range( value='uU'), + Name( value='UNICODEESCAPEDCHAR' ), + ], + ), + ], + ), + ], + ) +) + +SPGenerator.addDefinition ( + "SPECIALESCAPEDCHAR", + Range(value ='\\abfnrtv"\''), +) + +SPGenerator.addDefinition ( + "OCTALESCAPEDCHAR", # [0-7],[0-7]?,[0-7]? + SequentialGroup ( + children =[ + Range (value ="01234567"), + Range (value ="01234567", optional = 1), + Range (value ="01234567", optional = 1), + ], + ) +) +SPGenerator.addDefinition ( + "HEXESCAPEDCHAR", # [0-9a-fA-F],[0-9a-fA-F] + SequentialGroup ( + children =[ + Range (value ="0123456789abcdefABCDEF"), + Range (value ="0123456789abcdefABCDEF"), + ], + ) +) +SPGenerator.addDefinition( + "UNICODEESCAPEDCHAR", + SequentialGroup( + children=[ + Range (value ="0123456789abcdefABCDEF"), + Range (value ="0123456789abcdefABCDEF"), + Range (value ="0123456789abcdefABCDEF"), + Range (value ="0123456789abcdefABCDEF"), + SequentialGroup( + children = [ + Range (value ="0123456789abcdefABCDEF"), + Range (value ="0123456789abcdefABCDEF"), + Range (value ="0123456789abcdefABCDEF"), + Range (value ="0123456789abcdefABCDEF"), + ], + optional = True, + ) + ] + ) +) + +SPGenerator.addDefinition ( + "CHARNODBLQUOTE", + Range(value ='\\"', negative = 1, repeating = 1), +) +SPGenerator.addDefinition ( + "CHARNOSNGLQUOTE", + Range(value ="\\'", negative = 1, repeating = 1), +) + +declaration = r"""declarationset := declaration+ +declaration := ts, (unreportedname/expandedname/name) ,ts,':',':'?,'=',seq_group + +element_token := lookahead_indicator?, ts, negpos_indicator?,ts, (literal/range/group/name),ts, occurence_indicator?, ts, error_on_fail? + +negpos_indicator := [-+] +lookahead_indicator := "?" +occurence_indicator := [+*?] +error_on_fail := "!", (ts,literal)? + +>group< := '(',seq_group, ')' +seq_group := ts,(error_on_fail/fo_group/element_token), + (ts, seq_indicator, ts, + (error_on_fail/fo_group/element_token) + )*, ts + +fo_group := element_token, (ts, fo_indicator, ts, element_token)+ + + +# following two are likely something peoples might want to +# replace in many instances... + := "/" + := ',' + +unreportedname := '<', name, '>' +expandedname := '>', name, '<' +name := [a-zA-Z_],[a-zA-Z0-9_]* + := ( [ \011-\015]+ / comment )* +comment := '#',-'\n'*,'\n' +literal := literalDecorator?,("'",(CHARNOSNGLQUOTE/ESCAPEDCHAR)*,"'") / ('"',(CHARNODBLQUOTE/ESCAPEDCHAR)*,'"') +literalDecorator := [c] + + + +range := '[',CHARBRACE?,CHARDASH?, (CHARRANGE/CHARNOBRACE)*, CHARDASH?,']' +CHARBRACE := ']' +CHARDASH := '-' +CHARRANGE := CHARNOBRACE, '-', CHARNOBRACE +CHARNOBRACE := ESCAPEDCHAR/CHAR +CHAR := -[]] +ESCAPEDCHAR := '\\',( SPECIALESCAPEDCHAR / ('x',HEXESCAPEDCHAR) / ([uU],UNICODEESCAPEDCHAR) / OCTALESCAPEDCHAR ) +SPECIALESCAPEDCHAR := [\\abfnrtv"'] +OCTALESCAPEDCHAR := [0-7],[0-7]?,[0-7]? +HEXESCAPEDCHAR := [0-9a-fA-F],[0-9a-fA-F] +CHARNODBLQUOTE := -[\\"]+ +CHARNOSNGLQUOTE := -[\\']+ +UNICODEESCAPEDCHAR := [0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],([0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F])? +""" + +### Now the interpreter objects... +class Parser(baseparser.BaseParser): + """Parser which generates new parsers from EBNF grammars + + This parser class allows you to pass in an EBNF grammar as + the initialisation parameter. The EBNF is processed, and a + SimpleParse generator object is created as self.generator. + + Unlike most Parsers, this object is intended to be re-created + for each bit of data it parses (i.e. each EBNF), so it warps + the standard API a lot. + """ + _rootProduction = 'declarationset' + def __init__( self, ebnf, prebuilts=(), methodSource=None, definitionSources=() ): + """Create a new generator based on the EBNF in simpleparse format""" + processor = SPGrammarProcessor( prebuilts, definitionSources ) + success, tags, next = self.parse( ebnf, self._rootProduction, processor=processor ) + if next != len(ebnf): + lineNumber = lines(0, next, ebnf) + raise ValueError( + """Unable to complete parsing of the EBNF, stopped at line %s (%s chars of %s) +Unparsed:\n%s..."""%(lineNumber, next, len(ebnf), ebnf[next:next+100]) + ) + self.generator = processor.generator + def buildTagger( self, name=None, processor = None ): + """Build the tag-table for parsing the EBNF for this parser""" + return SPGenerator.buildParser( name, processor ) + +class SPGrammarProcessor( DispatchProcessor ): + """Processing object for post-processing an EBNF into a new generator""" + ### top level + def __init__( self, prebuilts=(), definitionSources=() ): + """Create a new generator based on the EBNF in simpleparse format""" + self.generator = generator.Generator() + for (name, table) in prebuilts: + if isinstance( table, ElementToken): + self.generator.addDefinition( name, table) + else: + self.generator.addDefinition( name, Prebuilt(value=table)) + for source in definitionSources: + self.generator.addDefinitionSource( source ) + + def declaration( self, info, buffer): + '''Base declaration from the grammar, a "production" or "rule"''' + (tag, left, right, sublist) = info + name = sublist[0] + expanded = 0 + if name[0] == "unreportedname": + name = name[3][0] + # note that the info is stored in the wrong place :( + report = 0 + elif name[0] == 'expandedname': + report = 1 + expanded = 1 + name = name[3][0] + else: + report = 1 + name = getString( name, buffer ) + self.currentProduction = name + content = dispatch( self, sublist[1], buffer ) + content.report = report + content.expanded = expanded + self.generator.addDefinition( + name, + content, + ) + del self.currentProduction + + ### element configuration + def element_token( self, info, buffer): + '''get the children, then configure''' + (tag, left, right, sublist) = info + base = None + negative = 0 + optional = 0 + repeating = 0 + lookahead = 0 + errorOnFail = None + for tup in sublist: + result = dispatch( self, tup, buffer ) + if tup[0] == 'negpos_indicator': + negative = result + elif tup[0] == 'occurence_indicator': + optional, repeating = result + elif tup[0] == 'lookahead_indicator': + lookahead = result + elif tup[0] == 'error_on_fail': + # we do some extra work here + errorOnFail = result + self._config_error_on_fail( errorOnFail, (tag,left,tup[1],[]), buffer ) + else: + base = result + base.optional = optional + base.negative = negative + base.repeating = repeating + base.lookahead = lookahead + if errorOnFail: + base.errorOnFail = errorOnFail + return base + + ### generator-node-builders + def seq_group( self, info, buffer): + """Process a sequential-group into a SequentialGroup element token""" + (tag, left, right, sublist) = info + children = dispatchList( self, sublist, buffer ) + errorOnFail = None + result = [] + for (item,tup) in zip(children,sublist): + if isinstance( item, ErrorOnFail ): + errorOnFail = item + else: + if errorOnFail: + item.errorOnFail = errorOnFail.copy() + self._config_error_on_fail( + item.errorOnFail, + tup, + buffer + ) + result.append( item ) + if len(result) == 1: + # single-item sequential group (very common) + return result[0] + elif not result: + raise ValueError( """SequentialGroup on line %s doesn't have an element-token child! grammar was %s"""%( lines(0,left, buffer), buffer[left:left+25])) + base = SequentialGroup( + children = result, + ) + return base + def fo_group( self, info, buffer): + """Process a first-of-group into a FirstOf element token""" + (tag, left, right, sublist) = info + children = dispatchList( self, sublist, buffer ) + if len(children) == 1: + # this should never happen, but if it does, we can deal with it I suppose... + return children[0] + base = FirstOfGroup( + children = children + ) + return base + + def literal( self, info, buffer): + '''Turn a literal result into a literal generator''' + (tag, left, right, sublist) = info + if sublist and sublist[0][0] == 'literalDecorator': + # right now only have the one decorator... + sublist = sublist[1:] + classObject = CILiteral + else: + classObject = Literal + elements = dispatchList( self, sublist, buffer) + ### Should check for CILiteral with non-CI string or single-character value! + return classObject( value = "".join(elements) ) + + def range( self, info, buffer): +## if hasattr( Range, 'requiresExpandedSet') and Range.requiresExpandedSet: + (tag, left, right, sublist) = info + return Range( + value = ''.join(dispatchList( self, sublist, buffer)), + ) +## else: +## # need to build up a new-syntax version of the range... +## # escape ^ to \^ +## # escape \ to \\ +## # escape - to \- +## # make sure range-sets are in proper order... +## raise NotImplementedError( """Haven't got the new CharSet version implemented yet""") + def name( self, tup, buffer): + return Name( + value = getString(tup, buffer), + ) + ### simple translators + occurenceIndicatorMap = { + '*': (1,1), + '+': (0,1), + '?': (1,0), + } + def occurence_indicator( self, tup, buffer): + '''Return optional, repeating as a tuple of true/false values''' + value = getString(tup, buffer) + return self.occurenceIndicatorMap[value] + def lookahead_indicator( self, tup, buffer ): + """If present, the lookahead indictor just says "yes", so just return 1""" + return 1 + def error_on_fail( self, info, buffer ): + """If present, we are going to make the current object an errorOnFail type, + + If there's a string literal child, then we use it to create the + "message" attribute of the errorOnFail object. + """ + (tag,left,right,children) = info + err = ErrorOnFail() + if children: + (tag,left,right,children) = children[0] + message = "".join(dispatchList( self, children, buffer)) + err.message = message + return err + def _config_error_on_fail( self, errorOnFail, tup, buffer ): + """Configure an error-on-fail instance for a given child tuple""" + # what we expected to find... + errorOnFail.expected = buffer[tup[1]:tup[2]] + if hasattr( self, "currentProduction"): + errorOnFail.production = self.currentProduction + + + negposIndicatorMap = { + '+': 0, + '-': 1, + } + def negpos_indicator( self, tup, buffer ): + '''return whether indicates negative''' + value = getString(tup, buffer) + return self.negposIndicatorMap[value] + + def CHARNODBLQUOTE( self, tup, buffer): + return getString(tup, buffer) + CHAR = CHARNOSNGLQUOTE = CHARNODBLQUOTE + def ESCAPEDCHAR( self, info, buffer): + (tag, left, right, sublist) = info + return "".join(dispatchList( self, sublist, buffer)) + specialescapedmap = { + 'a':'\a', + 'b':'\b', + 'f':'\f', + 'n':'\n', + 'r':'\r', + 't':'\t', + 'v':'\v', + '\\':'\\', + '"':'"', + "'":"'", + } + def SPECIALESCAPEDCHAR( self, tup, buffer): + return self.specialescapedmap[ getString(tup, buffer)] + def OCTALESCAPEDCHAR(self, tup, buffer): + return chr(int( getString(tup, buffer), 8 )) + def HEXESCAPEDCHAR( self, tup , buffer): + return chr(int( getString(tup, buffer), 16 )) + def CHARNOBRACE( self, info, buffer): + (tag, left, right, sublist) = info + return "".join(dispatchList( self, sublist, buffer)) + def CHARRANGE( self, info, buffer): + '''Create a string from first to second item''' + (tag, left, right, sublist) = info + first,second = dispatchList( self, sublist, buffer) + if second < first: + second, first = first, second + if isinstance( first, _unicode ) or isinstance( second, _unicode ): + _chr = _unichr + if not (isinstance( second, _unicode ) and isinstance( first, _unicode )): + raise ValueError( 'Range %s uses one unicode and one string escape, cannot mix'%(buffer[left:right]) ) + else: + _chr = chr + first, second = list(map( ord, (first,second) )) + return u''.join([_chr(u) for u in range(first,second+1)]) + def CHARDASH( self, tup , buffer): + return '-' + def CHARBRACE( self, tup , buffer): + return ']' + + def UNICODEESCAPEDCHAR( self, info, buffer): + """Decode a unicode-escaped hex character into a character value""" + (tag, left, right, sublist) = info + char = _unichr(int( buffer[left:right], 16 )) + return char diff -Nru simpleparse-2.1.0a1/simpleparse/stt/COPYRIGHT simpleparse-2.2.0/simpleparse/stt/COPYRIGHT --- simpleparse-2.1.0a1/simpleparse/stt/COPYRIGHT 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/COPYRIGHT 2012-10-17 20:54:31.000000000 +0000 @@ -0,0 +1,11 @@ + --------------------------------------------------------------------- + COPYRIGHT NOTICE + --------------------------------------------------------------------- + Copyright (c) 1997-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com + Copyright (c) 2000-2001, eGenix.com Software GmbH; mailto:info@egenix.com + Copyright (c) 2003-2006, Mike Fletcher; mailto:mcfletch@vrplumber.com + + All Rights Reserved. + + This software is protected by copyright and other intellectual + property laws and by international treaties. diff -Nru simpleparse-2.1.0a1/simpleparse/stt/Doc/eGenix-mx-Extensions.html simpleparse-2.2.0/simpleparse/stt/Doc/eGenix-mx-Extensions.html --- simpleparse-2.1.0a1/simpleparse/stt/Doc/eGenix-mx-Extensions.html 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/Doc/eGenix-mx-Extensions.html 2012-10-17 20:54:31.000000000 +0000 @@ -0,0 +1,1708 @@ + + + + eGenix.com mx Extensions for Python + + + + + + +
    +

    eGenix.com mx Extensions for Python

    + +
    + + + + +
    + + BASE package
        ( + mxDateTime : + mxTextTools : + mxStack : + mxTools : + mxProxy : + mxURL : + mxUID : + History : + Download + )
    + COMMERCIAL package
        ( + mxODBC : + History : + Buy Licenses : + Special Offer : + Download + )
    + EXPERIMENTAL package
        ( + mxNumber : + mxTidy : + History : + Download + )
    + Commercial Support : + Home +
    +
    + +     + +
    +
    + + + +

    Introduction

    + +
      + +

      + The eGenix.com mx + Extensions for Python are a collection of professional + quality Python software tools which enhance Python's usability in many + important areas such as ODBC database connectivity, fast + text processing, date/time processing and web site + programming. + +

      + The tools have a proven record of being portable across many + Unix and Windows platforms, e.g. you can write applications + which use an ODBC database on Windows which then run on Unix + platforms without change due to the consistent platforms + independent interfaces. + +

      + All of the available packages have shown their stability + and usefulness in many mission critical applications and + various commercial settings all around the world. + +

      + The two most well-known packages from the mx Extension + Series are mxDateTime and mxODBC providing date/time services and + professional ODBC database connectivity on practically all + supported Python platforms. These two packages enable + database software which is portable not only across + platforms, but also across database backends. + +

    + + + +

    Overview

    + +
    + + + +
    + +

    Packages

    + +
      + +

      + The following subpackages are included in the eGenix.com mx + Extension series, each providing fast and efficient + implementations for various application domains. All + subpackages live in the mx top-level Python package + to avoid naming collisions with other Python software. + +

      + + + + + + + + + + + + + + + + + + + + + + +
      + +

      + eGenix.com mx BASE Package: + +

      + +
        + + mxDateTime - Generic Date/Time Datatypes
        + mxTextTools - Fast Text Processing Tools
        + mxStack - Fast and Memory-Efficient Stack Datatype
        + mxTools - Collection of Additional Builtins
        + mxProxy - Generic Object Proxy & Weak Reference Datatype
        + mxBeeBase - On-disk B+Tree Database Construction Kit
        + mxURL - Efficient Storage and Management of URL/URI Information
        + mxUID - Create and Manage Unique IDs
        +
        + >>> Download
        +
        +
      + +
      + +

      + eGenix.com mx COMMERCIAL Package: + +

      + +
        + + mxODBC - Python DB-API compatible ODBC + 2.0 - 3.5 database interface;
        + supports Python 1.5.2 and Unicode for Python 2.0 and later
        +
        + >>> Download and + Buy Licenses
        +
        +
      + +
      + +

      + eGenix.com mx EXPERIMENTAL Package: + +

      + +
        + + mxNumber - Interface to GNU MP's High Precision Numerics
        + mxTidy - Interface to a library version of HTML Tidy
        +
        + >>> Download
        +
        +
      + +
      + +

      + +

    + +
    + +
    + +

    eGenix.com mx Extensions - BASE Package

    + +
    + +   Version 2.0.3   + +
    + +
    + +

    + + + +


    + +

    eGenix.com mx Extensions - COMMERCIAL Package

    + +
    + +   Version 2.0.4   + +
    + +
    + +

    + + + +


    + +

    eGenix.com mx Extensions - EXPERIMENTAL Package

    + +
    + +   Version 0.7.0   + +
    + +
    + +

    + + + +


    + +

    Commercial Support

    + +
    + + + +
    +
    + + © 1997-2000, Copyright by Marc-André Lemburg; All + Rights Reserved. mailto: mal@lemburg.com +
    + © 2000-2001, Copyright by eGenix.com Software GmbH, + Langenfeld; All Rights Reserved. mailto: info@egenix.com +
    + Trademarks: "mx Extensions" is a trademark of Marc-Andre + Lemburg and eGenix.com GmbH. + +
    + + + diff -Nru simpleparse-2.1.0a1/simpleparse/stt/Doc/mxLicense.html simpleparse-2.2.0/simpleparse/stt/Doc/mxLicense.html --- simpleparse-2.1.0a1/simpleparse/stt/Doc/mxLicense.html 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/Doc/mxLicense.html 2012-10-17 20:54:31.000000000 +0000 @@ -0,0 +1,868 @@ + + + + mx Extension Series - License Information + + + + + +
    +

    mx Extension Series - License Information

    + +
    + + + + +
    + + Public License : + Commercial License : + Home + + + + Version 1.1.0 + +
    +
    + +

    Introduction

    + +
      + +

      + The mx Extensions Series packages are brought to you by the + eGenix.com Software, Skills and Services GmbH, Langenfeld, + Germany. We are licensing our products under the following + two different licenses: +

      + +

      + The Public License is very similar to the Python 2.0 + license and covers the open source software made available + by eGenix.com which is free of charge even for commercial + use. + +

      + The Commercial License is intended for covering + commercial eGenix.com software, notably the mxODBC + package. Only private and non-commercial use is free of + charge. + +

      + If you have questions regarding these licenses, please + contact Licenses@eGenix.com. + If you would like to bundle the software with your + commercial product, please write to Sales@eGenix.com + for more information about the redistribution conditions and + terms. + +

    + + + +

    eGenix.com Public License

    + +
      + +

      + The eGenix.com Public License is similar to the Python 2.0 + and considered an Open Source license (in the sense defined + by the Open Source + Intiative (OSI)) by eGenix.com. + +

      + The license should also be compatible to the GNU Public License in case + that matters. The only part which is known to have caused + some problems with Richard Stallmann in the past is the + choice of law clause. + +

      +

      + + + + +
      +
      +________________________________________________________________________
      +
      +EGENIX.COM PUBLIC LICENSE AGREEMENT                        VERSION 1.1.0
      +________________________________________________________________________
      +
      +1.  Introduction
      +
      +    This "License Agreement" is between eGenix.com Software, Skills
      +    and Services GmbH ("eGenix.com"), having an office at
      +    Pastor-Loeh-Str. 48, D-40764 Langenfeld, Germany, and the
      +    Individual or Organization ("Licensee") accessing and otherwise
      +    using this software in source or binary form and its associated
      +    documentation ("the Software").
      +
      +2.  License 
      +
      +    Subject to the terms and conditions of this eGenix.com Public
      +    License Agreement, eGenix.com hereby grants Licensee a
      +    non-exclusive, royalty-free, world-wide license to reproduce,
      +    analyze, test, perform and/or display publicly, prepare derivative
      +    works, distribute, and otherwise use the Software alone or in any
      +    derivative version, provided, however, that the eGenix.com Public
      +    License Agreement is retained in the Software, or in any
      +    derivative version of the Software prepared by Licensee.
      +
      +3.  NO WARRANTY
      +
      +    eGenix.com is making the Software available to Licensee on an "AS
      +    IS" basis.  SUBJECT TO ANY STATUTORY WARRANTIES WHICH CAN NOT BE
      +    EXCLUDED, EGENIX.COM MAKES NO REPRESENTATIONS OR WARRANTIES,
      +    EXPRESS OR IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION,
      +    EGENIX.COM MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY
      +    OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT
      +    THE USE OF THE SOFTWARE WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
      +
      +4.  LIMITATION OF LIABILITY
      +
      +    EGENIX.COM SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF
      +    THE SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES
      +    OR LOSS (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF
      +    BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS
      +    INFORMATION, OR OTHER PECUNIARY LOSS) AS A RESULT OF USING,
      +    MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY DERIVATIVE THEREOF,
      +    EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
      +
      +    SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF
      +    INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THE ABOVE EXCLUSION OR
      +    LIMITATION MAY NOT APPLY TO LICENSEE.
      +
      +5.  Termination
      +
      +    This License Agreement will automatically terminate upon a
      +    material breach of its terms and conditions.
      +
      +6.  Third Party Rights 
      +
      +    Any software or documentation in source or binary form provided
      +    along with the Software that is associated with a separate license
      +    agreement is licensed to Licensee under the terms of that license
      +    agreement. This License Agreement does not apply to those portions
      +    of the Software. Copies of the third party licenses are included
      +    in the Software Distribution.
      +
      +7.  General
      +
      +    Nothing in this License Agreement affects any statutory rights of
      +    consumers that cannot be waived or limited by contract.
      +
      +    Nothing in this License Agreement shall be deemed to create any
      +    relationship of agency, partnership, or joint venture between
      +    eGenix.com and Licensee.
      +
      +    If any provision of this License Agreement shall be unlawful,
      +    void, or for any reason unenforceable, such provision shall be
      +    modified to the extent necessary to render it enforceable without
      +    losing its intent, or, if no such modification is possible, be
      +    severed from this License Agreement and shall not affect the
      +    validity and enforceability of the remaining provisions of this
      +    License Agreement.
      +
      +    This License Agreement shall be governed by and interpreted in all
      +    respects by the law of Germany, excluding conflict of law
      +    provisions. It shall not be governed by the United Nations
      +    Convention on Contracts for International Sale of Goods.
      +
      +    This License Agreement does not grant permission to use eGenix.com
      +    trademarks or trade names in a trademark sense to endorse or
      +    promote products or services of Licensee, or any third party.
      +
      +    The controlling language of this License Agreement is English. If
      +    Licensee has received a translation into another language, it has
      +    been provided for Licensee's convenience only.
      +
      +8.  Agreement
      +
      +    By downloading, copying, installing or otherwise using the
      +    Software, Licensee agrees to be bound by the terms and conditions
      +    of this License Agreement.
      +
      +
      +    For question regarding this License Agreement, please write to:
      +
      +	      eGenix.com Software, Skills and Services GmbH
      +	      Pastor-Loeh-Str. 48
      +	      D-40764 Langenfeld
      +	      Germany
      +		    
      +
      +
      + +

      + +
    + + + +

    eGenix.com Commercial License

    + +
      + +

      + The eGenix.com Commercial License is covers commercial + eGenix.com software, notably the mxODBC package. Only + private and non-commercial use is free of charge. + +

      + Usage of the software in commercial settings such as for + implementing in-house applications in or for companies, + governments, for-profit organizations, etc. requires a + signed "Proof of Authorization" which can be bought from + eGenix.com in order to authorize this use. + +

      eGenix.com Commercial Licensing Models

      + +

      + We currently offer four models to choose from: +

        + +
      1. CPU License: per-installation licenses (both for + commercial and non-commercial use)

      2. + +
      3. Developer CPU License: per-developer-seat + licenses which allow redistribution

      4. + +
      5. Reseller Agreement: agreement which allows + reselling the software to third parties for standalone use +

      6. + +
      7. Product Reseller Agreement: agreement which + allows reselling the software to third parties for use in a + specific product only

      8. + +
      + +

      + The first two options are covered by the eGenix.com + Commercial License through the "Proof of Authorization" + forms we provide below. The two reseller options have to be + negotiated between the reseller and eGenix.com. Please + contact sales@eGenix.com if you + are interested in becoming an eGenix.com software reseller. + +

      + +

      +

      + + + + +
      +
      +________________________________________________________________________
      +
      +EGENIX.COM COMMERCIAL LICENSE AGREEMENT                    VERSION 1.1.0
      +________________________________________________________________________
      +
      +1.  Introduction
      +
      +    This "License Agreement" is between eGenix.com Software, Skills
      +    and Services GmbH ("eGenix.com"), having an office at
      +    Pastor-Loeh-Str. 48, D-40764 Langenfeld, Germany, and the
      +    Individual or Organization ("Licensee") accessing and otherwise
      +    using this software in source or binary form and its associated
      +    documentation ("the Software").
      +
      +2.  Terms and Definitions
      +
      +    The "Software" covered under this License Agreement includes
      +    without limitation, all object code, source code, help files,
      +    publications, documentation and other programs, products or tools
      +    that are included in the official "Software Distribution"
      +    available from eGenix.com.
      +
      +    The "Proof of Authorization" for the Software is a written and
      +    signed notice from eGenix.com providing evidence of the extent of
      +    authorizations the Licensee has acquired to use the Software and
      +    of Licensee's eligibility for future upgrade program prices (if
      +    announced) and potential special or promotional opportunities. As
      +    such, the Proof of Authorization becomes part of this License
      +    Agreement.
      +
      +    Installation of the Software ("Installation") refers to the
      +    process of unpacking or copying the files included in the Software
      +    Distribution to an Installation Target.
      +
      +    "Installation Target" refers to the target of an installation
      +    operation.  Targets are defined as follows:
      +
      +	1) "CPU" refers to a central processing unit which is able to
      +	store and/or execute the Software (a server, personal
      +    	computer, or other computer-like device) using at most two (2)
      +    	processors,
      +
      +	2) "Site" refers to at most one hundred fifty (150) CPUs
      +    	installed at a single site of a company,
      +
      +	3) "Corporate" refers to at most one thousand (1000) CPUs
      +	installed at an unlimited number of sites of the company,
      +
      +	4) "Developer CPU" refers to a single CPU used by at most one (1)
      +    	developer.
      +
      +    When installing the Software on a server CPU for use by other CPUs
      +    in a network, Licensee must obtain a License for the server CPU
      +    and for all client CPUs attached to the network which will make
      +    use of the Software by copying the Software in binary or source
      +    form from the server into their CPU memory. If a CPU makes use of
      +    more than two (2) processors, Licensee must obtain additional CPU
      +    licenses to cover the total number of installed
      +    processors. Likewise, if a Developer CPU is used by more than one
      +    developer, Licensee must obtain additional Developer CPU licenses
      +    to cover the total number of developers using the CPU.
      +
      +    "Commercial Environment" refers to any application environment
      +    which is aimed at directly or indirectly generating profit. This
      +    includes, without limitation, for-profit organizations,
      +    governments, private educational institutions, work as independent
      +    contractor, consultant and other profit generating relationships
      +    with organizations or individuals.
      +
      +    "Non-Commercial Environments" are all those application
      +    environments which do not directly or indirectly generate profit.
      +    Public educational institutions and officially acknowledged
      +    non-profit organizations are regarded as being a Non-Commercial
      +    Environments in the aforementioned sense.
      +
      +3.  License Grant
      +
      +    Subject to the terms and conditions of this License Agreement,
      +    eGenix.com hereby grants Licensee a non-exclusive, world-wide
      +    license to
      +
      +	1) use the Software to the extent of authorizations Licensee has
      +	acquired and
      +
      +	2) distribute, make and install copies to support the level of use
      +	authorized, providing Licensee reproduces this License Agreement
      +	and any other legends of ownership on each copy, or partial copy,
      +	of the Software.
      +
      +    If Licensee acquires this Software as a program upgrade,
      +    Licensee's authorization to use the Software from which Licensee
      +    upgraded is terminated.
      +
      +    Licensee will ensure that anyone who uses the Software does so
      +    only in compliance with the terms of this License Agreement.
      +
      +    Licensee may not 
      +
      +	1) use, copy, install, compile, modify, or distribute the
      +    	Software except as provided in this License Agreement;
      +
      +	2) reverse assemble, reverse engineer, reverse compile, or
      +	otherwise translate the Software except as specifically
      +    	permitted by law without the possibility of contractual
      +    	waiver; or
      +
      +	3) rent, sublicense or lease the Software.
      +
      +4.  Authorizations
      +
      +    The extent of authorization depends on the ownership of a Proof of
      +    Authorization for the Software.
      +
      +    Usage of the Software for any other purpose not explicitly covered
      +    by this License Agreement or granted by the Proof of Authorization
      +    is not permitted and requires the written prior permission from
      +    eGenix.com.
      +
      +5.  Modifications
      +
      +    Software modifications may only be distributed in form of patches
      +    to the original files contained in the Software Distribution.
      +
      +    The patches must be accompanied by a legend of origin and
      +    ownership and a visible message stating that the patches are not
      +    original Software delivered by eGenix.com, nor that eGenix.com can
      +    be held liable for possible damages related directly or indirectly
      +    to the patches if they are applied to the Software.
      +
      +6.  Experimental Code or Features
      +
      +    The Software may include components containing experimental code
      +    or features which may be modified substantially before becoming
      +    generally available.
      +
      +    These experimental components or features may not be at the level
      +    of performance or compatibility of generally available eGenix.com
      +    products. eGenix.com does not guarantee that any of the
      +    experimental components or features contained in the eGenix.com
      +    will ever be made generally available.
      +
      +7.  Expiration and License Control Devices
      +
      +    Components of the Software may contain disabling or license
      +    control devices that will prevent them from being used after the
      +    expiration of a period of time or on Installation Targets for
      +    which no license was obtained.
      +
      +    Licensee will not tamper with these disabling devices or the
      +    components. Licensee will take precautions to avoid any loss of
      +    data that might result when the components can no longer be used.
      +
      +8.  NO WARRANTY
      +
      +    eGenix.com is making the Software available to Licensee on an "AS
      +    IS" basis. SUBJECT TO ANY STATUTORY WARRANTIES WHICH CAN NOT BE
      +    EXCLUDED, EGENIX.COM MAKES NO REPRESENTATIONS OR WARRANTIES,
      +    EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION,
      +    EGENIX.COM MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY
      +    OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT
      +    THE USE OF THE SOFTWARE WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
      +
      +9.  LIMITATION OF LIABILITY
      +
      +    TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT
      +    SHALL EGENIX.COM BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE
      +    SOFTWARE FOR (I) ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES
      +    OR LOSS (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF
      +    BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS
      +    INFORMATION, OR OTHER PECUNIARY LOSS) AS A RESULT OF USING,
      +    MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY DERIVATIVE THEREOF,
      +    EVEN IF ADVISED OF THE POSSIBILITY THEREOF; OR (II) ANY AMOUNTS IN
      +    EXCESS OF THE AGGREGATE AMOUNTS PAID TO EGENIX.COM UNDER THIS
      +    LICENSE AGREEMENT DURING THE TWELVE (12) MONTH PERIOD PRECEEDING
      +    THE DATE THE CAUSE OF ACTION AROSE.
      +
      +    SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF
      +    INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THE ABOVE EXCLUSION OR
      +    LIMITATION MAY NOT APPLY TO LICENSEE.
      +
      +10. Termination
      +
      +    This License Agreement will automatically terminate upon a
      +    material breach of its terms and conditions if not cured within
      +    thirty (30) days of written notice by eGenix.com. Upon
      +    termination, Licensee shall discontinue use and remove all
      +    installed copies of the Software.
      +
      +11. Indemnification 
      +
      +    Licensee hereby agrees to indemnify eGenix.com against and hold
      +    harmless eGenix.com from any claims, lawsuits or other losses that
      +    arise out of Licensee's breach of any provision of this License
      +    Agreement.
      +
      +12. Third Party Rights 
      +
      +    Any software or documentation in source or binary form provided
      +    along with the Software that is associated with a separate license
      +    agreement is licensed to Licensee under the terms of that license
      +    agreement. This License Agreement does not apply to those portions
      +    of the Software. Copies of the third party licenses are included
      +    in the Software Distribution.
      +
      +13. High Risk Activities 
      +
      +    The Software is not fault-tolerant and is not designed,
      +    manufactured or intended for use or resale as on-line control
      +    equipment in hazardous environments requiring fail-safe
      +    performance, such as in the operation of nuclear facilities,
      +    aircraft navigation or communication systems, air traffic control,
      +    direct life support machines, or weapons systems, in which the
      +    failure of the Software, or any software, tool, process, or
      +    service that was developed using the Software, could lead directly
      +    to death, personal injury, or severe physical or environmental
      +    damage ("High Risk Activities").
      +
      +    Accordingly, eGenix.com specifically disclaims any express or
      +    implied warranty of fitness for High Risk Activities.
      +
      +    Licensee agree that eGenix.com will not be liable for any claims
      +    or damages arising from the use of the Software, or any software,
      +    tool, process, or service that was developed using the Software,
      +    in such applications.
      +
      +14. General
      +
      +    Nothing in this License Agreement affects any statutory rights of
      +    consumers that cannot be waived or limited by contract.
      +
      +    Nothing in this License Agreement shall be deemed to create any
      +    relationship of agency, partnership, or joint venture between
      +    eGenix.com and Licensee.
      +
      +    If any provision of this License Agreement shall be unlawful,
      +    void, or for any reason unenforceable, such provision shall be
      +    modified to the extent necessary to render it enforceable without
      +    losing its intent, or, if no such modification is possible, be
      +    severed from this License Agreement and shall not affect the
      +    validity and enforceability of the remaining provisions of this
      +    License Agreement.
      +
      +    This License Agreement shall be governed by and interpreted in all
      +    respects by the law of Germany, excluding conflict of law
      +    provisions. It shall not be governed by the United Nations
      +    Convention on Contracts for International Sale of Goods.
      +
      +    This License Agreement does not grant permission to use eGenix.com
      +    trademarks or trade names in a trademark sense to endorse or
      +    promote products or services of Licensee, or any third party.
      +
      +    The controlling language of this License Agreement is English. If
      +    Licensee has received a translation into another language, it has
      +    been provided for Licensee's convenience only.
      +
      +15. Agreement
      +
      +    By downloading, copying, installing or otherwise using the
      +    Software, Licensee agrees to be bound by the terms and conditions
      +    of this License Agreement.
      +
      +
      +    For question regarding this License Agreement, please write to:
      +
      +	      eGenix.com Software, Skills and Services GmbH
      +	      Pastor-Loeh-Str. 48
      +	      D-40764 Langenfeld
      +	      Germany
      +		    
      +
      +
      + +

      + If you have questions, please send e-mail to licenses@egenix.com or + use the above postal address. + +

      + +

      eGenix.com Proof of Authorization (1 CPU License)

      + +

      + This is the "Proof of Authorization" we send out for a "1 + CPU License" subject to the above license. It permits you to + install and use the Software on one machine having at most 2 + processors. + +

      +

      + + + + +
      +
      +________________________________________________________________________
      +
      +EGENIX.COM PROOF OF AUTHORIZATION:                         1 CPU License
      +________________________________________________________________________
      +
      +1.  License Grant
      +
      +    eGenix.com Software, Skills and Services GmbH ("eGenix.com"),
      +    having an office at Pastor-Loeh-Str. 48, D-40764 Langenfeld,
      +    Germany, hereby grants the Individual or Organization ("Licensee")
      +
      +       Licensee:  xxxxxx
      +
      +    a non-exclusive, world-wide license to use the software listed
      +    below in source or binary form and its associated documentation
      +    ("the Software") under the terms and conditions of this License
      +    Agreement and to the extent authorized by this Proof of
      +    Authorization.
      +
      +2.  Covered Software
      +
      +       Software Name:              mxODBC Python ODBC Interface
      +       Software Version:           2.1
      +				   (including all patch level releases)
      +       Software Distribution:      As officially made available by 
      +				   eGenix.com on http://www.egenix.com/
      +       Operating System:           any compatible operating system
      +
      +3.  Authorizations
      +
      +    eGenix.com hereby authorizes Licensee to copy, install, compile,
      +    modify and use the Software on the following Installation Targets
      +    under the terms of this License Agreement.
      +
      +       Installation Targets:       one (1) CPU
      +
      +    Use of the Software for any other purpose or redistribution IS NOT
      +    PERMITTED BY THIS PROOF OF AUTHORIZATION.
      +
      +4.  Proof
      +
      +    This Proof of Authorization was issued by
      +
      +	Marc-Andre Lemburg, CEO eGenix.com
      +	Langenfeld, xxxx-xx-xx
      +
      +	Proof of Authorization Key:
      +	xxxx-xxxx-xxxx-xxxx-xxxx-xxxx
      +		    
      +
      +
      + +

      + When you buy CPU licenses, you will receive a digitally + signed "Proof of Authorization" by e-mail. + +

      + The PGP key used to signed these proofs is named "eGenix.com + Licenses <licenses@egenix.com>" and can be fetched + from any PGP key server, e.g. OpenPGP Public Key + Server. The PGP key ID is 8C25C2A2; its fingerprint is + "2E1B D691 A231 E09B CEF5 C9D5 C792 13DD 8C25 C2A2". To + check the digital signature, use one of the available PGP or + GPG programs available on the Internet. + +

      + +

      eGenix.com Proof of Authorization (1 Developer CPU License)

      + +

      + This is the "Proof of Authorization" we send out for a "1 + Developer CPU License" subject to the above license. It + allows you to redistribute the Software developed on the + developer machine under certain conditions and is targetted + at product developers wanting to use the Software in their + products. + +

      + Please contact sales@eGenix.com if you + have questions about the redistribution conditions or other + requirements. + +

      +

      + + + + +
      +
      +________________________________________________________________________
      +
      +EGENIX.COM PROOF OF AUTHORIZATION:               1 Developer CPU License
      +________________________________________________________________________
      +
      +1.  License Grant
      +
      +    eGenix.com Software, Skills and Services GmbH ("eGenix.com"),
      +    having an office at Pastor-Loeh-Str. 48, D-40764 Langenfeld,
      +    Germany, hereby grants the Individual or Organization ("Licensee")
      +
      +       Licensee:  xxxxxx
      +
      +    a non-exclusive, world-wide license to use the software listed
      +    below in source or binary form and its associated documentation
      +    ("the Software") under the terms and conditions of this License
      +    Agreement and to the extent authorized by this Proof of
      +    Authorization.
      +
      +2.  Covered Software
      +
      +       Software Name:              mxODBC Python ODBC Interface
      +       Software Version:           2.1
      +				   (including all patch level releases)
      +       Software Distribution:      As officially made available by 
      +				   eGenix.com on http://www.egenix.com/
      +       Operating System:           any compatible operating system
      +
      +3.  Authorizations
      +
      +3.1. Application Development
      +
      +    eGenix.com hereby authorizes Licensee to copy, install, compile,
      +    modify and use the Software on the following Developer
      +    Installation Targets for the purpose of developing products using
      +    the Software as integral part.
      +
      +       Developer Installation Targets: one (1) CPU
      +
      +3.2. Redistribution
      +
      +    eGenix.com hereby authorizes Licensee to redistribute the Software
      +    bundled with a product developed by Licensee on the Developer
      +    Installation Targets ("the Product") subject to the terms and
      +    conditions of this License Agreement for installation and use in
      +    combination with the Product on the following Redistribution
      +    Installation Targets, provided that:
      +
      +        1) Licensee shall not and shall not permit or assist any third
      +        party to sell or distribute the Software as a separate
      +        product;
      +
      +        2) Licensee shall not and shall not permit any third party to
      +
      +           (i) market, sell or distribute the Software to any end user
      +           except subject to the eGenix Commercial License Agreement,
      +
      +           (ii) rent, sell, lease or otherwise transfer the Software
      +           or any part thereof or use it for the benefit of any third
      +           party,
      +
      +           (iii) use the Software outside the Product or for any other
      +           purpose not expressly licensed hereunder;
      +
      +        3) the Product does not provide functions or capabilities
      +        similar to those of the Software itself, i.e. the Product does
      +        not introduce commercial competition for the Software as sold
      +        by eGenix.com.
      +
      +       Redistribution Installation Targets: any number of CPUs capable of
      +                                            running the Product and the
      +                                            Software
      +
      +4.  Proof
      +
      +    This Proof of Authorization was issued by
      +
      +	Marc-Andre Lemburg, CEO eGenix.com
      +	Langenfeld, xxxx-xx-xx
      +
      +	Proof of Authorization Key:
      +	xxxx-xxxx-xxxx-xxxx-xxxx-xxxx
      +		    
      +
      +
      + +

      + When you buy Developer CPU licenses, you will receive a + digitally signed "Proof of Authorization" by e-mail. + +

      + The PGP key used to signed these proofs is named "eGenix.com + Licenses <licenses@egenix.com>" and can be fetched + from any PGP key server, e.g. OpenPGP Public Key + Server. The PGP key ID is 8C25C2A2; its fingerprint is + "2E1B D691 A231 E09B CEF5 C9D5 C792 13DD 8C25 C2A2". To + check the digital signature, use one of the available PGP or + GPG programs available on the Internet. + +

      + If you have questions, please send e-mail to licenses@egenix.com or + use the above postal address. + +

      + +

      eGenix.com Proof of Authorization (Non-Commercial-Use 1 CPU License)

      + +

      + This is the "Proof of Authorization" we send out for a + "Non-Commercial-Use 1 CPU License" subject to the above + license agreement. It permits you to install and use the + Software on one machine having at most 2 processors in a + Non-Commercial Environment as defined in the license + agreement. + +

      + Please contact licenses@eGenix.com if + you have questions about the term "Non-Commercial + Environment" and whether this license covers your needs or + not. + +

      +

      + + + + +
      +
      +________________________________________________________________________
      +
      +EGENIX.COM PROOF OF AUTHORIZATION:      Non-Commercial-Use 1 CPU License
      +________________________________________________________________________
      +
      +1.  License Grant
      +
      +    eGenix.com Software, Skills and Services GmbH ("eGenix.com"),
      +    having an office at Pastor-Loeh-Str. 48, D-40764 Langenfeld,
      +    Germany, hereby grants the Individual or Organization ("Licensee")
      +
      +       Licensee:  xxxxxx
      +
      +    a non-exclusive, world-wide license to use the software listed
      +    below in source or binary form and its associated documentation
      +    ("the Software") under the terms and conditions of this License
      +    Agreement and to the extent authorized by this Proof of
      +    Authorization.
      +
      +2.  Covered Software
      +
      +       Software Name:              mxODBC Python ODBC Interface
      +       Software Version:           2.1
      +				   (including all patch level releases)
      +       Software Distribution:      As officially made available by 
      +				   eGenix.com on http://www.egenix.com/
      +       Operating System:           any compatible operating system
      +
      +3.  Authorizations
      +
      +    eGenix.com hereby authorizes Licensee to copy, install, compile,
      +    modify and use the Software on the following Installation Targets
      +    under the terms of this License Agreement IN NON-COMMERCIAL
      +    ENVIRONMENTS ONLY.
      +
      +       Installation Targets:       one (1) CPU
      +
      +    Use of the Software in a Commercial Environment or for any other
      +    purpose or redistribution IS NOT PERMITTED BY THIS PROOF OF
      +    AUTHORIZATION.
      +
      +4.  Proof
      +
      +    This Proof of Authorization was issued by
      +
      +	Marc-Andre Lemburg, CEO eGenix.com
      +	Langenfeld, xxxx-xx-xx
      +
      +	Proof of Authorization Key:
      +	xxxx-xxxx-xxxx-xxxx-xxxx-xxxx
      +		    
      +
      +
      + +

      + When you request Non-Commercial-Use CPU licenses, you will + receive a digitally signed "Proof of Authorization" by + e-mail. + +

      + The PGP key used to signed these proofs is named "eGenix.com + Licenses <licenses@egenix.com>" and can be fetched + from any PGP key server, e.g. OpenPGP Public Key + Server. The PGP key ID is 8C25C2A2; its fingerprint is + "2E1B D691 A231 E09B CEF5 C9D5 C792 13DD 8C25 C2A2". To + check the digital signature, use one of the available PGP or + GPG programs available on the Internet. + +

      + +

    + +
    +
    © 2000-2003, Copyright by eGenix.com + Software GmbH, Langengeld, Germany; All Rights Reserved. mailto: + info@egenix.com +
    + + + diff -Nru simpleparse-2.1.0a1/simpleparse/stt/Doc/mxTextTools.html simpleparse-2.2.0/simpleparse/stt/Doc/mxTextTools.html --- simpleparse-2.1.0a1/simpleparse/stt/Doc/mxTextTools.html 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/Doc/mxTextTools.html 2012-10-17 20:54:31.000000000 +0000 @@ -0,0 +1,2677 @@ + + + + TextTools - Fast Text Manipulation Tools for Python + + + + + + +
    + +

    mxTextTools - Fast Text Manipulation Tools for Python

    + +
    + + + + +
    + + Engine : + TextSearch Objects : + CharSet Objects : + Functions : + Constants : + Examples : + Structure : + Support : + Download : + Copyright & License : + History : + Home + + + + Version 2.1.0 + +
    +
    + +

    Introduction

    + +
      + +

      + mxTextTools is a collection of high-speed string + manipulation routines and new Python objects for dealing + with common text processing tasks. + +

      + One of the major features of this package is the integrated + Tagging Engine which allows accessing the speed of compiled + C programs while maintaining the portability of Python. The + Tagging Engine uses byte code "programs" written in form of + Python tuples. These programs are then translated into an + internal binary form which gets processed by a very fast + virtual machine designed specifically for scanning text + data. + +

      + As a result, the Tagging Engine allows parsing text at + higher speeds than e.g. regular expression packages while + still maintaining the flexibility of programming the parser + in Python. Callbacks and user-defined matching functions + extends this approach far beyond what you could do with + other common text processing methods. + +

      + Two other major features are the search and character set + objects provided by the package. Both are implemented in C + to give you maximum performance on all supported platforms. + +

      + A note about the word 'tagging': This originated from + what is done in HTML to mark some text with a certain extra + information. The Tagging Engine extends this notion to + assigning Python objects to text substrings. Every substring + marked in this way carries a 'tag' (the object) which can be + used to do all kinds of useful things. + +

      + If you are looking for more tutorial style documentation of + mxTextTools, there's a new book by David Mertz about Text Processing with + Python which covers mxTextTools and other text oriented + tools at great length. + +

    + + + +

    Tagging Engine

    + +
    + + + +

    TextSearch Object

    + +
      + +

      + The TextSearch object is immutable and usable for one search + string per object only. However, once created, the + TextSearch objects can be applied to as many text strings as + you like -- much like compiled regular expressions. Matching + is done exact (doing translations on-the-fly if supported by + the search algorithm). + +

      + Furthermore, the TextSearch objects can be pickled and + implement the copy protocol as defined by the copy + module. Comparisons and hashing are not implemented (the + objects are stored by id in dictionaries). + +

      + Depending on the search algorithm, TextSearch objects can + search in 8-bit strings and/or Unicode. Searching in memory + buffers is currently not supported. Accordingly, the search + string itself may also be an 8-bit string or Unicode. + +

      TextSearch Object Constructors

      + +
        + +

        + In older versions of mxTextTools there were two separate + constructors for search objects: BMS() for + Boyer-Moore and FS() for the (unpublished) + FastSearch algorithm. With 2.1.0 the interface was + changed to merge these two constructors into one having + the algorithm type as parameter. + +

        + Note: The FastSearch algorithm is *not* included + in the public release of mxTextTools. + +

        +

        +
        + TextSearch(match,translate=None,algorithm=default_algorithm) +
        + +
        + Create a TextSearch substring search object for the + string match implementing the algorithm specified in + the constructor. +

        + algorithm defines the algorithm to + use. Possible values are: +

        + +
        BOYERMOORE
        + +
        Enhanced Boyer-Moore-Horspool style algorithm + for searching in 8-bit text. Unicode is not + supported. On-the-fly translation is + supported.

        + +
        FASTSEARCH
        + +
        Enhanced Boyer-Moore style algorithm for + searching in 8-bit text. This algorithm provides + better performance for match patterns having + repeating sequences, like e.g. DNA strings. Unicode + is not supported. On-the-fly translation is + supported. +

        + Not included in the public release of + mxTextTools.

        + +
        TRIVIAL
        + +
        Trivial right-to-left search algorithm. This + algorithm can be used to search in 8-bit text and + Unicode. On-the-fly translation is not + supported.
        + +
        + +

        + algorithm defaults to BOYERMOORE (or + FASTSEARCH if available) for 8-bit match strings and + TRIVIAL for Unicode match strings. + +

        + translate is an optional + translate-string like the one used in the module + 're', i.e. a 256 character string mapping the + oridnals of the base character set to new + characters. It is supported by the BOYERMOORE and + the FASTSEARCH algorithm only. + +

        + This function supports keyword arguments. + +

        + +

        + BMS(match[,translate])
        + +
        + DEPRECATED: Use TextSearch(match, translate, + BOYERMOORE) instead. +

        + +

        + FS(match[,translate])
        + +
        + DEPRECATED: Use TextSearch(match, translate, + FASTSEARCH) instead. +

        + +

        +
      + +

      TextSearch Object Instance Variables

      + +
        +

        + To provide some help for reflection and pickling the + TextSearch object gives (read-only) access to these + attributes. + +

        +

        + +
        + match
        + +
        + The string that the search object will look for in the + search text.

        + +
        + translate
        + +
        + The translate string used by the object or None (if no + translate string was passed to the + constructor).

        + +
        + algorithm
        + +
        + The algorithm used by the TextSearch object. For possible + values, see the TextSearch() constructor documentation. +

        + +
        + +
      + +

      TextSearch Object Instance Methods

      + +
        +

        + The TextSearch object has the following methods: + +

        +

        + +
        + search(text,[start=0,stop=len(text)])
        + +
        + Search for the substring match in text, looking only + at the slice [start:stop] and return + the slice (l,r) where the substring was + found, or (start,start) if it was not + found.

        + +

        + find(text,[start=0,stop=len(text)])
        + +
        + Search for the substring match in text, looking only + at the slice [start:stop] and return + the index where the substring was found, or + -1 if it was not found. This interface is + compatible with string.find.

        + +

        + findall(text,start=0,stop=len(text))
        + +
        + Same as search(), but return a list of + all non-overlapping slices (l,r) where + the match string can be found in text.

        + +

        + +

        + Note that translating the text before doing the search + often results in a better performance. Use + string.translate() to do that efficiently. + +

      +
    + +
    + +

    CharSet Object

    + +
      + +

      + The CharSet object is an immutable object which can be used + for character set based string operations like text + matching, searching, splitting etc. + +

      + CharSet objects can be pickled and implement the copy + protocol as defined by the copy module as well as the + 'in'-protocol, so that c in charset works as + expected. Comparisons and hashing are not implemented (the + objects are stored by id in dictionaries). + +

      + The objects support both 8-bit strings and UCS-2 Unicode in + both the character set definition and the various methods. + Mixing of the supported types is also allowed. Memory + buffers are currently not supported. + +

      CharSet Object Constructor

      + +
        + +

        +

        +
        + CharSet(definition) +
        + +
        + Create a CharSet object for the given character set + definition. +

        + definition may be an 8-bit string or + Unicode. +

        + The constructor supports the re-module syntax for + defining character sets: "a-e" maps to "abcde" (the + backslash can be used to escape the special meaning + of "-", e.g. r"a\-e" maps to "a-e") and "^a-e" maps + to the set containing all but the characters + "abcde". +

        + Note that the special meaning of "^" only applies if + it appears as first character in a CharSet + definition. If you want to create a CharSet with the + single character "^", then you'll have to use the + escaped form: r"\^". The non-escape form "^" would + result in a CharSet matching all characters. +

        + To add the backslash character to a CharSet you have + to escape with itself: r"\\". +

        + Watch out for the Python quoting semantics in these + explanations: the small r in front of some of these + strings makes the raw Python literal strings which + means that no interpretation of backslashes is + applied: r"\\" == "\\\\" and r"a\-e" == "a\\-e". + +

        + +

        +
      + +

      CharSet Object Instance Variables

      + +
        +

        + To provide some help for reflection and pickling the + CharSet object gives (read-only) access to these + attributes. + +

        +

        + +
        + definition
        + +
        + The definition string which was passed to the + constructor.

        + +
        + +
      + +

      CharSet Object Instance Methods

      + +
        +

        + The CharSet object has these methods: + +

        +

        + +
        + contains(char) +
        + +
        + Return 1 if char is included in the character set, 0 + otherwise. + +

        + +

        + search(text[, direction=1, start=0, stop=len(text)]) +
        + +
        + Search text[start:stop] for the first + character included in the character set. Returns + None if no such character is found or the + index position of the found character. +

        + direction defines the search direction: + a positive value searches forward starting from + text[start], while a negative value + searches backwards from text[stop-1]. + +

        + +

        + match(text[, direction=1, start=0, stop=len(text)]) +
        + +
        + Look for the longest match of characters in + text[start:stop] which appear in the + character set. Returns the length of this match as + integer. +

        + direction defines the match direction: + a positive value searches forward starting from + text[start] giving a prefix match, + while a negative value searches backwards from + text[stop-1] giving a suffix match. + +

        + +

        + split(text, [,start=0, stop=len(text)])
        + +
        + Split text[start:stop] into a list of + substrings using the character set definition, + omitting the splitting parts and empty substrings. + +

        + +

        + splitx(text, [,start=0, stop=len(text)])
        + +
        + Split text[start:stop] into a list of + substrings using the character set definition, such + that every second entry consists only of characters in + the set. + +

        + +

        + strip(text[, where=0, start=0, stop=len(text)])
        + +
        + Strip all characters in text[start:stop] + appearing in the character set. +

        + where indicates where to strip (<0: + left; =0: left and right; >0: right). +

        + +

        + +
      +
    + +
    + +

    Functions

    + +
    + + + +

    Constants

    + +
      + +

      + The package exports these constants. They are defined in + Constants/Sets. + +

      + Note that Unicode defines many more characters in the + following categories. The character sets defined here are + restricted to ASCII (and parts of Latin-1) only. + +

      +

        +
        + +
        + a2z
        + +
        + 'abcdefghijklmnopqrstuvwxyz'

        + +

        + A2Z
        + +
        + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

        + +

        + a2z
        + +
        + 'abcdefghijklmnopqrstuvwxyz'

        + +

        + umlaute
        + +
        + 'äöüß'

        + +

        + Umlaute
        + +
        + 'ÄÖÜ'

        + +

        + alpha
        + +
        + A2Z + a2z

        + +

        + a2z
        + +
        + 'abcdefghijklmnopqrstuvwxyz'

        + +

        + german_alpha
        + +
        + A2Z + a2z + umlaute + Umlaute

        + +

        + number
        + +
        + '0123456789'

        + +

        + alphanumeric
        + +
        + alpha + number

        + +

        + white
        + +
        + ' \t\v'

        + +

        + newline
        + +
        + '\n\r'

        + +

        + formfeed
        + +
        + '\f'

        + +

        + whitespace
        + +
        + white + newline + formfeed

        + +

        + any
        + +
        + All characters from \000-\377

        + +

        + *_charset
        + +
        + All of the above as CharSet() objects.

        + +

        + *_set
        + +
        + All of the above as set() compatible character + sets.

        + +

        + tagtable_cache
        + +
        + This the cache dictionary which is used by the + TagTable() compiler to store compiled Tag Table + definitions. It has a hard limit of 100 entries, but + can also be managed by user routines to lower this + limit.

        + +

        + BOYERMOORE, FASTSEARCH, TRIVIAL
        + +
        + TextSearch() algorithm values. +

        + +

        +
      + +
    + +
    + +

    Examples of Use

    + +
      + +

      + The Examples/ subdirectory of the package contains a + few examples of how tables can be written and used. Here is a + non-trivial example for parsing HTML (well, most of it): + +

      
      +    from simpleparse.stt.TextTools import *
      +
      +    error = '***syntax error'			# error tag obj
      +
      +    tagname_set = set(alpha+'-'+number)
      +    tagattrname_set = set(alpha+'-'+number)
      +    tagvalue_set = set('"\'> ',0)
      +    white_set = set(' \r\n\t')
      +
      +    tagattr = (
      +	   # name
      +	   ('name',AllInSet,tagattrname_set),
      +	   # with value ?
      +	   (None,Is,'=',MatchOk),
      +	   # skip junk
      +	   (None,AllInSet,white_set,+1),
      +	   # unquoted value
      +	   ('value',AllInSet,tagvalue_set,+1,MatchOk),
      +	   # double quoted value
      +	   (None,Is,'"',+5),
      +	     ('value',AllNotIn,'"',+1,+2),
      +	     ('value',Skip,0),
      +	     (None,Is,'"'),
      +	     (None,Jump,To,MatchOk),
      +	   # single quoted value
      +	   (None,Is,'\''),
      +	     ('value',AllNotIn,'\'',+1,+2),
      +	     ('value',Skip,0),
      +	     (None,Is,'\'')
      +	   )
      +
      +    valuetable = (
      +	# ignore whitespace + '='
      +	(None,AllInSet,set(' \r\n\t='),+1),
      +	# unquoted value
      +	('value',AllInSet,tagvalue_set,+1,MatchOk),
      +	# double quoted value
      +	(None,Is,'"',+5),
      +	 ('value',AllNotIn,'"',+1,+2),
      +	 ('value',Skip,0),
      +	 (None,Is,'"'),
      +	 (None,Jump,To,MatchOk),
      +	# single quoted value
      +	(None,Is,'\''),
      +	 ('value',AllNotIn,'\'',+1,+2),
      +	 ('value',Skip,0),
      +	 (None,Is,'\'')
      +	)
      +
      +    allattrs = (# look for attributes
      +	       (None,AllInSet,white_set,+4),
      +	        (None,Is,'>',+1,MatchOk),
      +	        ('tagattr',Table,tagattr),
      +	        (None,Jump,To,-3),
      +	       (None,Is,'>',+1,MatchOk),
      +	       # handle incorrect attributes
      +	       (error,AllNotIn,'> \r\n\t'),
      +	       (None,Jump,To,-6)
      +	       )
      +
      +    htmltag = ((None,Is,'<'),
      +	       # is this a closing tag ?
      +	       ('closetag',Is,'/',+1),
      +	       # a coment ?
      +	       ('comment',Is,'!',+8),
      +		(None,Word,'--',+4),
      +		('text',sWordStart,BMS('-->'),+1),
      +		(None,Skip,3),
      +		(None,Jump,To,MatchOk),
      +		# a SGML-Tag ?
      +		('other',AllNotIn,'>',+1),
      +		(None,Is,'>'),
      +		    (None,Jump,To,MatchOk),
      +		   # XMP-Tag ?
      +		   ('tagname',Word,'XMP',+5),
      +		    (None,Is,'>'),
      +		    ('text',WordStart,'</XMP>'),
      +		    (None,Skip,len('</XMP>')),
      +		    (None,Jump,To,MatchOk),
      +		   # get the tag name
      +		   ('tagname',AllInSet,tagname_set),
      +		   # look for attributes
      +		   (None,AllInSet,white_set,+4),
      +		    (None,Is,'>',+1,MatchOk),
      +		    ('tagattr',Table,tagattr),
      +		    (None,Jump,To,-3),
      +		   (None,Is,'>',+1,MatchOk),
      +		   # handle incorrect attributes
      +		   (error,AllNotIn,'> \n\r\t'),
      +		   (None,Jump,To,-6)
      +		  )
      +
      +    htmltable = (# HTML-Tag
      +		 ('htmltag',Table,htmltag,+1,+4),
      +		 # not HTML, but still using this syntax: error or inside XMP-tag !
      +		 (error,Is,'<',+3),
      +		  (error,AllNotIn,'>',+1),
      +		  (error,Is,'>'),
      +		 # normal text
      +		 ('text',AllNotIn,'<',+1),
      +		 # end of file
      +		 ('eof',EOF,Here,-5),
      +		)
      +      
      +	
      + +

      + I hope this doesn't scare you away :-) ... it's + fast as hell. + +

    + +
    + +

    Package Structure

    + +
      + +
      +[TextTools]
      +       [Constants]
      +              Sets.py
      +              TagTables.py
      +       Doc/
      +       [Examples]
      +              HTML.py
      +              Loop.py
      +              Python.py
      +              RTF.py
      +              RegExp.py
      +              Tim.py
      +              Words.py
      +              altRTF.py
      +              pytag.py
      +       [mxTextTools]
      +              test.py
      +       TextTools.py
      +    
      + +

      + Entries enclosed in brackets are packages (i.e. they are + directories that include a __init__.py file). Ones with + slashes are just ordinary subdirectories that are not accessible + via import. + +

      + The package TextTools imports everything needed from the other + components. It is sometimes also handy to do a from + simpleparse.stt.TextTools.Constants.TagTables import *. + +

      + Examples/ contains a few demos of what the Tag Tables + can do. + +

      + +

    + +
    + +

    Optional Add-Ons for mxTextTools

    + +
      + +

      + Mike C. Fletcher is working on a Tag Table generator called SimpleParse. + It works as parser generating front end to the Tagging Engine + and converts a EBNF style grammar into a Tag Table directly + useable with the tag() function. + +

      + Tony J. Ibbs has started to work on a meta-language + for mxTextTools. It aims at simplifying the task of writing + Tag Table tuples using a Python style syntax. It also gets rid + off the annoying jump offset calculations. + +

      + Andrew Dalke has started work on a parser generator called Martel built + upon mxTextTools which takes a regular expression grammer for a + format and turns the resultant parsed tree into a set of + callback events emulating the XML/SAX API. The results look very + promising ! + +

    + + + +

    Support

    + +
    + + + +

    Copyright & License

    + +
    + + + +

    History & Future

    + +
    + +

    +


    +
    +

    + © 1997-2000, Copyright by Marc-André Lemburg; + All Rights Reserved. mailto: mal@lemburg.com +

    + © 2000-2001, Copyright by eGenix.com Software GmbH; + All Rights Reserved. mailto: info@egenix.com +

    + + + + diff -Nru simpleparse-2.1.0a1/simpleparse/stt/__init__.py simpleparse-2.2.0/simpleparse/stt/__init__.py --- simpleparse-2.1.0a1/simpleparse/stt/__init__.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/__init__.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,22 @@ +"""SimpleParse' built-in version of the mxTextTools text-processing engine + + Copyright (c) 1998-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com + Copyright (c) 2000-2003, eGenix.com Software GmbH; mailto:info@egenix.com + Copyright (c) 2003-2006, Mike Fletcher; mailto:mcfletch@vrplumber.com + + See the documentation for further information on copyrights, + or contact the author. All Rights Reserved. + + IMPORTANT: + The subpackages included in the mx Extension series may have + different license terms imposed on them. Be sure to read the + documentation of each subpackage *before* using them. + +""" +__copyright__ = """\ +Copyright (c) 1998-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com +Copyright (c) 2000-2003, eGenix.com Software GmbH; mailto:info@egenix.com +Copyright (c) 2003-2006, Mike Fletcher; mailto:mcfletch@vrplumber.com + See the documentation for further information on copyrights, + or contact the author. All Rights Reserved. +""" \ No newline at end of file diff -Nru simpleparse-2.1.0a1/simpleparse/stt/LICENSE simpleparse-2.2.0/simpleparse/stt/LICENSE --- simpleparse-2.1.0a1/simpleparse/stt/LICENSE 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/LICENSE 2012-10-17 20:54:31.000000000 +0000 @@ -0,0 +1,16 @@ +============================================================================= +eGenix.com mx Extension Series for Python +----------------------------------------------------------------------------- + +Please see the LICENSE file in the package subdirectories for information +on copyright and authorized use of the packages. + +Some packages are licensed as Open Source software, others are free for +educational use only. All packages come with full source code. + +----------------------------------------------------------------------------- +Marc-Andre Lemburg, mal@egenix.com +CEO +eGenix.com GmbH +Langenfeld +Germany diff -Nru simpleparse-2.1.0a1/simpleparse/stt/mxLicense.html simpleparse-2.2.0/simpleparse/stt/mxLicense.html --- simpleparse-2.1.0a1/simpleparse/stt/mxLicense.html 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/mxLicense.html 2012-10-17 20:54:31.000000000 +0000 @@ -0,0 +1,691 @@ + + + + mx Extension Series - License Information + + + + + +
    +

    mx Extension Series - License Information

    + +
    + + + + +
    + + Public License : + Commercial License : + Home + + + + Version 1.0.0 + +
    +
    + +

    Introduction

    + +
      + +

      + The mx Extensions Series packages are brought to you by the + eGenix.com Software, Skills and Services GmbH, Langenfeld, + Germany. We are licensing our products under the following + two different licenses: +

      + +

      + The Public License is very similar to the Python 2.0 + license and covers the open source software made available + by eGenix.com which is free of charge even for commercial + use. + +

      + The Commercial License is intended for covering + commercial eGenix.com software, notably the mxODBC + package. Only private and non-commercial use is free of + charge. + +

      + If you have questions regarding these licenses, please + contact Licenses@eGenix.com. + If you would like to bundle the software with your + commercial product, please write to Sales@eGenix.com + for more information about the redistribution conditions and + terms. + +

    + + + +

    eGenix.com Public License

    + +
      + +

      + The eGenix.com Public License is similar to the Python 2.0 + and considered an Open Source license (in the sense defined + by the Open Source + Intiative (OSI)) by eGenix.com. + +

      + The license should also be compatible to the GNU Public License in case + that matters. The only part which is known to have caused + some problems with Richard Stallmann in the past is the + choice of law clause. + +

      + + + + + +
      + +

      EGENIX.COM PUBLIC LICENSE AGREEMENT VERSION 1.0.0

      +

      +

      1. Introduction

      +

      +This "License Agreement" is between eGenix.com Software, Skills and +Services GmbH ("eGenix.com"), having an office at Pastor-Loeh-Str. 48, +D-40764 Langenfeld, Germany, and the Individual or Organization +("Licensee") accessing and otherwise using this software in source or +binary form and its associated documentation ("the Software"). +

      +

      2. License

      +

      +Subject to the terms and conditions of this eGenix.com Public License +Agreement, eGenix.com hereby grants Licensee a non-exclusive, +royalty-free, world-wide license to reproduce, analyze, test, perform +and/or display publicly, prepare derivative works, distribute, and +otherwise use the Software alone or in any derivative version, +provided, however, that the eGenix.com Public License Agreement is +retained in the Software, or in any derivative version of the Software +prepared by Licensee. +

      +

      3. NO WARRANTY

      +

      +eGenix.com is making the Software available to Licensee on an "AS IS" +basis. SUBJECT TO ANY STATUTORY WARRANTIES WHICH CAN NOT BE EXCLUDED, +EGENIX.COM MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. +BY WAY OF EXAMPLE, BUT NOT LIMITATION, EGENIX.COM MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. +

      +

      4. LIMITATION OF LIABILITY

      +

      +EGENIX.COM SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE +SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS +(INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, +BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER +PECUNIARY LOSS) AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE +SOFTWARE, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE +POSSIBILITY THEREOF. +

      +SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF +INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THE ABOVE EXCLUSION OR +LIMITATION MAY NOT APPLY TO LICENSEE. +

      +

      5. Termination

      +

      +This License Agreement will automatically terminate upon a material +breach of its terms and conditions. +

      +

      6. General

      +

      +Nothing in this License Agreement affects any statutory rights of +consumers that cannot be waived or limited by contract. +

      +Nothing in this License Agreement shall be deemed to create any +relationship of agency, partnership, or joint venture between +eGenix.com and Licensee. +

      +If any provision of this License Agreement shall be unlawful, void, or +for any reason unenforceable, such provision shall be modified to the +extent necessary to render it enforceable without losing its intent, +or, if no such modification is possible, be severed from this License +Agreement and shall not affect the validity and enforceability of the +remaining provisions of this License Agreement. +

      +This License Agreement shall be governed by and interpreted in all +respects by the law of Germany, excluding conflict of law +provisions. It shall not be governed by the United Nations Convention +on Contracts for International Sale of Goods. +

      +This License Agreement does not grant permission to use eGenix.com +trademarks or trade names in a trademark sense to endorse or promote +products or services of Licensee, or any third party. +

      +The controlling language of this License Agreement is English. If +Licensee has received a translation into another language, it has been +provided for Licensee's convenience only. +

      +

      14. Agreement

      +

      +By downloading, copying, installing or otherwise using the Software, +Licensee agrees to be bound by the terms and conditions of this +License Agreement. +

      + +

      + +

    + + + +

    eGenix.com Commercial License

    + +
      + +

      + The eGenix.com Commercial License is covers commercial + eGenix.com software, notably the mxODBC package. Only + private and non-commercial use is free of charge. Usage of + the software in commercial settings such as for implementing + in-house applications in/for companies or consulting work + where the software is used as tool requires a "Proof of + Authorization" which can be bought from eGenix.com. + +

      + + + + + +
      + +

      EGENIX.COM COMMERCIAL LICENSE AGREEMENT VERSION 1.0.0

      +

      +

      1. Introduction

      +

      +This "License Agreement" is between eGenix.com Software, Skills and +Services GmbH ("eGenix.com"), having an office at Pastor-Loeh-Str. 48, +D-40764 Langenfeld, Germany, and the Individual or Organization +("Licensee") accessing and otherwise using this software in source or +binary form and its associated documentation ("the Software"). +

      +

      2. Terms and Definitions

      +

      +The "Software" covered under this License Agreement includes without +limitation, all object code, source code, help files, publications, +documentation and other programs, products or tools that are included +in the official "Software Distribution" available from eGenix.com. +

      +The "Proof of Authorization" for the Software is a written and signed +notice from eGenix.com providing evidence of the extent of +authorizations the Licensee has acquired to use the Software and of +Licensee's eligibility for future upgrade program prices (if +announced) and potential special or promotional opportunities. As +such, the Proof of Authorization becomes part of this License Agreement. +

      +Installation of the Software ("Installation") refers to the process of +unpacking or copying the files included in the Software Distribution +to an Installation Target. +

      +"Installation Target" refers to the target of an installation +operation. Targets are defined as follows: +

      + 1) "CPU" refers to a central processing unit which is able to + store and/or execute the Software (a server, personal computer, or + other computer-like device) using at most two (2) processors, +

      + 2) "Site" refers to at most one hundred fifty (150) CPUs installed + at a single site of a company, +

      + 3) "Corporate" refers to at most one thousand (1000) CPUs + installed at an unlimited number of sites of the company, +

      + 4) "Developer CPU" refers to a single CPU used by at most one (1) + developer. +

      +When installing the Software on a server CPU for use by other CPUs in +a network, Licensee must obtain a License for the server CPU and for +all client CPUs attached to the network which will make use of the +Software by copying the Software in binary or source form from the +server into their CPU memory. If a CPU makes use of more than two (2) +processors, Licensee must obtain additional CPU licenses to cover the +total number of installed processors. Likewise, if a Developer CPU is +used by more than one developer, Licensee must obtain additional +Developer CPU licenses to cover the total number of developers using +the CPU. +

      +"Commercial Environment" refers to any application environment which +is aimed at producing profit. This includes, without limitation, +for-profit organizations, work as independent contractor, consultant +and other profit generating relationships with organizations or +individuals. +

      +"Non-Commercial Environments" are all those application environments +which do not directly or indirectly generate profit. Educational and +other officially acknowledged non-profit organizations are regarded as +being a Non-Commercial Environment in the above sense. +

      +

      3. License Grant

      +

      +Subject to the terms and conditions of this License Agreement, +eGenix.com hereby grants Licensee a non-exclusive, world-wide license +to +

      + 1) use the Software to the extent of authorizations Licensee has + acquired and +

      + 2) distribute, make and install copies to support the level of use + authorized, providing Licensee reproduces this License Agreement + and any other legends of ownership on each copy, or partial copy, + of the Software. +

      +If Licensee acquires this Software as a program upgrade, Licensee's +authorization to use the Software from which Licensee upgraded is +terminated. +

      +Licensee will ensure that anyone who uses the Software does so only in +compliance with the terms of this License Agreement. +

      +Licensee may not +

      + 1) use, copy, install, compile, modify, or distribute the Software + except as provided in this License Agreement; +

      + 2) reverse assemble, reverse engineer, reverse compile, or + otherwise translate the Software except as specifically permitted + by law without the possibility of contractual waiver; or +

      + 3) rent, sublicense or lease the Software. +

      +

      4. Authorizations

      +

      +The extent of authorization depends on the ownership of a Proof of +Authorization for the Software. +

      +Usage of the Software for any other purpose not explicitly covered by +this License Agreement or granted by the Proof of Authorization is not +permitted and requires the written prior permission from eGenix.com. +

      +

      4.1. Non-Commercial Environments

      +

      +This section applies to all uses of the Software without a Proof of +Authorization for the Software in a Non-Commercial Environment. +

      +Licensee may copy, install, compile, modify and use the Software under +the terms of this License Agreement FOR NON-COMMERCIAL PURPOSES ONLY. +

      +Use of the Software in a Commercial Environment or for any other +purpose, such as redistribution, IS NOT PERMITTED BY THIS LICENSE and +requires a Proof of Authorization from eGenix.com. +

      +

      4.2. Evaluation Period for Commercial Environments

      +

      +This section applies to all uses of the Software without a Proof of +Authorization for the Software in a Commercial Environment. +

      +Licensee may copy, install, compile, modify and use the Software under +the terms of this License Agreement FOR EVALUATION AND TESTING +PURPOSES and DURING A LIMITED EVALUATION PERIOD OF AT MOST THIRTY (30) +DAYS AFTER INITIAL INSTALLATION ONLY. +

      +For use of the Software after the evaluation period or for any other +purpose, such as redistribution, Licensee must obtain a Proof of +Authorization from eGenix.com. +

      +If Licensee decides not to obtain a Proof of Authorization after the +evaluation period, Licensee agrees to cease using and to remove all +installed copies of the Software. +

      +

      4.3. Usage under Proof of Authorization

      +

      +This section applies to all uses of the Software provided that +Licensee owns a Proof of Authorization for the Software. +

      +Licensee may copy, install, compile, modify, use and distribute the +Software to the extent of authorization acquired by the Proof of +Authorization and under the terms an conditions of this License +Agreement. +

      +

      5. Transfer of Rights and Obligations

      +

      +Licensee may transfer all license rights and obligations under a Proof +of Authorization for the Software to another party by transferring the +Proof of Authorization and a copy of this License Agreement and all +documentation. +

      +The transfer of Licensee's license rights and obligations terminates +Licensee's authorization to use the Software under the Proof of +Authorization. +

      +

      6. Modifications

      +

      +Software modifications may only be distributed in form of patches to +the original files contained in the Software Distribution. +

      +The patches must be accompanied by a legend of origin and ownership +and a visible message stating that the patches are not original +Software delivered by eGenix.com, nor that eGenix.com can be held +liable for possible damages related directly or indirectly to the +patches if they are applied to the Software. +

      +

      7. Experimental Code or Features

      +

      +The Software may include components containing experimental code or +features which may be modified substantially before becoming generally +available. +

      +These experimental components or features may not be at the level of +performance or compatibility of generally available eGenix.com +products. eGenix.com does not guarantee that any of the experimental +components or features contained in the eGenix.com will ever be made +generally available. +

      +

      8. Expiration and License Control Devices

      +

      +Components of the Software may contain disabling or license control +devices that will prevent them from being used after the expiration of +a period of time or on Installation Targets for which no license was +obtained. +

      +Licensee will not tamper with these disabling devices or the +components. Licensee will take precautions to avoid any loss of data +that might result when the components can no longer be used. +

      +

      9. NO WARRANTY

      +

      +eGenix.com is making the Software available to Licensee on an "AS IS" +basis. SUBJECT TO ANY STATUTORY WARRANTIES WHICH CAN NOT BE EXCLUDED, +EGENIX.COM MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, EGENIX.COM MAKES NO +AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR +FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE +WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. +

      +

      10. LIMITATION OF LIABILITY

      +

      +TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT SHALL +EGENIX.COM BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE SOFTWARE +FOR (I) ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS +(INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, +BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER +PECUNIARY LOSS) AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE +SOFTWARE, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE +POSSIBILITY THEREOF; OR (II) ANY AMOUNTS IN EXCESS OF THE AGGREGATE +AMOUNTS PAID TO EGENIX.COM UNDER THIS LICENSE AGREEMENT DURING THE +TWELVE (12) MONTH PERIOD PRECEEDING THE DATE THE CAUSE OF ACTION +AROSE. +

      +SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF +INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THE ABOVE EXCLUSION OR +LIMITATION MAY NOT APPLY TO LICENSEE. +

      +

      11. Termination

      +

      +This License Agreement will automatically terminate upon a material +breach of its terms and conditions if not cured within thirty (30) +days of written notice by eGenix.com. Upon termination, Licensee shall +discontinue use and remove all installed copies of the Software. +

      +

      12. Indemnification

      +

      +Licensee hereby agrees to indemnify eGenix.com against and hold +harmless eGenix.com from any claims, lawsuits or other losses that +arise out of Licensee's breach of any provision of this License +Agreement. +

      +

      13. Third Party Rights

      +

      +Any software or documentation in source or binary form provided along +with the Software that is associated with a separate license agreement +is licensed to Licensee under the terms of that license +agreement. This License Agreement does not apply to those portions of +the Software. Copies of the third party licenses are included in the +Software Distribution. +

      +

      14. High Risk Activities

      +

      +The Software is not fault-tolerant and is not designed, manufactured +or intended for use or resale as on-line control equipment in +hazardous environments requiring fail-safe performance, such as in the +operation of nuclear facilities, aircraft navigation or communication +systems, air traffic control, direct life support machines, or weapons +systems, in which the failure of the Software, or any software, tool, +process, or service that was developed using the Software, could lead +directly to death, personal injury, or severe physical or +environmental damage ("High Risk Activities"). +

      +Accordingly, eGenix.com specifically disclaims any express or implied +warranty of fitness for High Risk Activities. +

      +Licensee agree that eGenix.com will not be liable for any claims or +damages arising from the use of the Software, or any software, tool, +process, or service that was developed using the Software, in such +applications. +

      +

      15. General

      +

      +Nothing in this License Agreement affects any statutory rights of +consumers that cannot be waived or limited by contract. +

      +Nothing in this License Agreement shall be deemed to create any +relationship of agency, partnership, or joint venture between +eGenix.com and Licensee. +

      +If any provision of this License Agreement shall be unlawful, void, or +for any reason unenforceable, such provision shall be modified to the +extent necessary to render it enforceable without losing its intent, +or, if no such modification is possible, be severed from this License +Agreement and shall not affect the validity and enforceability of the +remaining provisions of this License Agreement. +

      +This License Agreement shall be governed by and interpreted in all +respects by the law of Germany, excluding conflict of law +provisions. It shall not be governed by the United Nations Convention +on Contracts for International Sale of Goods. +

      +This License Agreement does not grant permission to use eGenix.com +trademarks or trade names in a trademark sense to endorse or promote +products or services of Licensee, or any third party. +

      +The controlling language of this License Agreement is English. If +Licensee has received a translation into another language, it has been +provided for Licensee's convenience only. +

      +

      16. Agreement

      +

      +By downloading, copying, installing or otherwise using the Software, +Licensee agrees to be bound by the terms and conditions of this +License Agreement. +

      +

      +For question regarding this license agreement, please write to: +

      +	  eGenix.com Software, Skills and Services GmbH
      +	  Pastor-Loeh-Str. 48
      +	  D-40764 Langenfeld
      +	  Germany
      +
      + +
      + +

      + The following two sections give examples of the "Proof of + Authorization" for a commercial use license of product under + this license. + +

      + When you buy such a license, you will receive a signed + "Proof of Authorization" by postal mail within a week or + two. We will also send you the Proof of Authorization Key + by e-mail to acknowledge acceptance of the payment. + +

      + + + + + +
      + +

      EGENIX.COM PROOF OF AUTHORIZATION (Example: CPU License)

      +

      +

      1. License Grant

      +

      +eGenix.com Software, Skills and Services GmbH ("eGenix.com"), having +an office at Pastor-Loeh-Str. 48, D-40764 Langenfeld, Germany, hereby +grants the Individual or Organization ("Licensee") a non-exclusive, +world-wide license to use the software listed below in source or +binary form and its associated documentation ("the Software") under +the terms and conditions of the eGenix.com Commercial License +Agreement Version 1.0.0 and to the extent authorized by this Proof of +Authorization. +

      +

      2. Covered Software

      +
      +   Software Name:		   mxODBC Python ODBC Interface
      +   Software Version:		   Version 2.0.0
      +   Software Distribution:	   mxODBC-2.0.0.zip
      +   Software Distribution MD5 Hash: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
      +   Operating System:		   any compatible operating system
      +
      +

      3. Authorizations

      +

      +eGenix.com hereby authorizes Licensee to copy, install, compile, +modify and use the Software on the following Installation Targets. +

      +   Installation Targets:	   one (1) CPU
      +
      +Redistribution of the Software is not allowed under this Proof of +Authorization. +

      +

      4. Proof

      +

      +This Proof of Authorization was issued by +

      +

      +	      __________________________________
      +
      +
      +	      Langenfeld, ______________________
      +
      +              Proof of Authorization Key:
      +              xxxx-xxxx-xxxx-xxxx-xxxx-xxxx
      +
      +

      + +

      + +

      + The next section gives an example of a "Developer CPU + Licenses" which allows you to redistribute software built + around the Software or integrating it. Please contact sales@eGenix.com for + questions about the redistribution conditions. + +

      + + + + + +
      + +

      EGENIX.COM PROOF OF AUTHORIZATION (Example: Developer License)

      +

      +

      1. License Grant

      +

      +eGenix.com Software, Skills and Services GmbH ("eGenix.com"), having +an office at Pastor-Loeh-Str. 48, D-40764 Langenfeld, Germany, hereby +grants the Individual or Organization ("Licensee") a non-exclusive, +world-wide license to use and distribute the software listed below in +source or binary form and its associated documentation ("the +Software") under the terms and conditions of the eGenix.com Commercial +License Agreement Version 1.0.0 and to the extent authorized by this +Proof of Authorization. +

      +

      2. Covered Software

      +
      +   Software Name:		   mxODBC Python ODBC Interface
      +   Software Version:		   Version 2.0.0
      +   Software Distribution:	   mxODBC-2.0.0.zip
      +   Software Distribution MD5 Hash: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
      +   Operating System:		   any compatible operating system
      +
      +

      3. Authorizations

      +

      +

      3.1. Application Development

      +

      +eGenix.com hereby authorizes Licensee to copy, install, compile, +modify and use the Software on the following Developer Installation +Targets for the purpose of developing products using the Software as +integral part. +

      +   Developer Installation Targets: one (1) CPU
      +
      +

      3.2. Redistribution

      +

      +eGenix.com hereby authorizes Licensee to redistribute the Software +bundled with a products developed by Licensee on the Developer +Installation Targets ("the Product") subject to the terms and +conditions of the eGenix.com Commercial License Agreement for +installation and use in combination with the Product on the following +Redistribution Installation Targets, provided that: +

      + 1) Licensee shall not and shall not permit or assist any third + party to sell or distribute the Software as a separate product; +

      + 2) Licensee shall not and shall not permit any third party to +

      + (i) market, sell or distribute the Software to any end user + except subject to the eGenix Commercial License Agreement, +

      + (ii) rent, sell, lease or otherwise transfer the Software or + any part thereof or use it for the benefit of any third party, +

      + (iii) use the Software outside the Product or for any other + purpose not expressly licensed hereunder; +

      + 3) the Product does not provide functions or capabilities similar + to those of the Software itself, i.e. the Product does not + introduce commercial competition for the Software as sold by + eGenix.com. +

      +

      +   Redistribution Installation Targets:	any number of CPUs capable of
      +					running the Product and the
      +					Software
      +
      +

      4. Proof

      +

      +This Proof of Authorization was issued by +

      +

      +	      __________________________________
      +
      +
      +	      Langenfeld, ______________________
      +
      +              Proof of Authorization Key:
      +              xxxx-xxxx-xxxx-xxxx-xxxx-xxxx
      +
      +

      + +

      + +

    + +
    +
    © 2000, Copyright by eGenix.com + Software GmbH, Langengeld, Germany; All Rights Reserved. mailto: + info@egenix.com +
    + + + diff -Nru simpleparse-2.1.0a1/simpleparse/stt/TextTools/Constants/Sets.py simpleparse-2.2.0/simpleparse/stt/TextTools/Constants/Sets.py --- simpleparse-2.1.0a1/simpleparse/stt/TextTools/Constants/Sets.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/TextTools/Constants/Sets.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,58 @@ +# -*- coding: latin-1 -*- + +""" Constants for sets (of characters) + + Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com + Copyright (c) 2000-2003, eGenix.com Software GmbH; mailto:info@egenix.com + See the documentation for further information on copyrights, + or contact the author. All Rights Reserved. +""" +import string +from simpleparse.stt.TextTools.mxTextTools import CharSet + +# Simple character strings +a2z = 'abcdefghijklmnopqrstuvwxyz' +A2Z = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' +umlaute = 'äöüß' +Umlaute = 'ÄÖÜ' +alpha = A2Z + a2z +german_alpha = A2Z + a2z + umlaute + Umlaute +number = '0123456789' +alphanumeric = alpha + number +white = ' \t\v' +newline = '\r\n' +formfeed = '\f' +whitespace = white + newline + formfeed +any = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377' + +# Precompiled as sets, e.g. a2z_set = set(a2z) +a2z_set = '\000\000\000\000\000\000\000\000\000\000\000\000\376\377\377\007\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000' +A2Z_set = '\000\000\000\000\000\000\000\000\376\377\377\007\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000' +alpha_set = '\000\000\000\000\000\000\000\000\376\377\377\007\376\377\377\007\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000' +german_alpha_set = '\000\000\000\000\000\000\000\000\376\377\377\007\376\377\377\007\000\000\000\000\000\000\000\000\020\000@\220\020\000@\020' +number_set = '\000\000\000\000\000\000\377\003\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000' +alphanumeric_set = '\000\000\000\000\000\000\377\003\376\377\377\007\376\377\377\007\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000' +white_set = '\000\002\000\000\001\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000' +newline_set = '\000$\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000' +whitespace_set = '\000&\000\000\001\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000' +nonwhitespace_set = '\377\301\377\377\376\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377' +any_set = '\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377' + +# Compiled as CharSet instances +a2z_charset = CharSet('a-z') +A2Z_charset = CharSet('A-Z') +umlaute_charset = CharSet('äöüß') +Umlaute_charset = CharSet('ÄÖÜ') +alpha_charset = CharSet(A2Z + a2z) +german_alpha_charset = CharSet(A2Z + a2z + umlaute + Umlaute) +number_charset = CharSet('0-9') +alphanumeric_charset = CharSet(alpha + number) +white_charset = CharSet(' \t\v') +newline_charset = CharSet('\r\n') +formfeed_charset = CharSet('\f') +whitespace_charset = CharSet(white + newline + formfeed) +nonwhitespace_charset = CharSet('^' + white + newline + formfeed) +any_charset = CharSet('\000-\377') + +# Clean up +del CharSet, string \ No newline at end of file diff -Nru simpleparse-2.1.0a1/simpleparse/stt/TextTools/Constants/TagTables.py simpleparse-2.2.0/simpleparse/stt/TextTools/Constants/TagTables.py --- simpleparse-2.1.0a1/simpleparse/stt/TextTools/Constants/TagTables.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/TextTools/Constants/TagTables.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,32 @@ +""" Constants for writing tag tables + + These are defined in mxte.h and imported here via the C extension. + See the documentation for details about the various constants. + + Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com + Copyright (c) 2000-2003, eGenix.com Software GmbH; mailto:info@egenix.com + See the documentation for further information on copyrights, + or contact the author. All Rights Reserved. + +""" +### Module init. + +def _module_init(): + + from simpleparse.stt.TextTools.mxTextTools import mxTextTools + global id2cmd + + id2cmd = {} + + # Fetch symbols from the C extension and add them to this module + ns = globals() + for name, value in list(vars(mxTextTools).items()): + if name[:7] == '_const_': + cmd = name[7:] + ns[cmd] = value + if value == 0: + id2cmd[0] = 'Fail/Jump' + else: + id2cmd[value] = cmd + +_module_init() \ No newline at end of file diff -Nru simpleparse-2.1.0a1/simpleparse/stt/TextTools/COPYRIGHT simpleparse-2.2.0/simpleparse/stt/TextTools/COPYRIGHT --- simpleparse-2.1.0a1/simpleparse/stt/TextTools/COPYRIGHT 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/TextTools/COPYRIGHT 2012-10-17 20:54:31.000000000 +0000 @@ -0,0 +1,19 @@ + --------------------------------------------------------------------- + COPYRIGHT NOTICE + --------------------------------------------------------------------- + Copyright (c) 1997-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com + Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com + Copyright (c) 2003-2006, Mike Fletcher; mailto:mcfletch@vrplumber.com + + All Rights Reserved. + + This software is protected by copyright and other intellectual + property laws and by international treaties. It may only be used + under the conditions and terms of the eGenix.com Public License + Agreement. + + You should have received a copy of the eGenix.com Public License + Agreement with this software (usually in the file LICENSE located + in the package's or software's main directory). Please write to + licenses@egenix.com to obtain a copy in case you should not have + received a copy. diff -Nru simpleparse-2.1.0a1/simpleparse/stt/TextTools/__init__.py simpleparse-2.2.0/simpleparse/stt/TextTools/__init__.py --- simpleparse-2.1.0a1/simpleparse/stt/TextTools/__init__.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/TextTools/__init__.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,66 @@ +""" mxTextTools - A tools package for fast text processing. + + Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com + Copyright (c) 2000-2003, eGenix.com Software GmbH; mailto:info@egenix.com + Copyright (c) 2003-2006, Mike Fletcher; mailto:mcfletch@vrplumber.com + See the documentation for further information on copyrights, + or contact the author. All Rights Reserved. +""" +from .TextTools import * +from .TextTools import __version__ + +try: + import copyreg +except ImportError: + import copy_reg as copyreg + + +### Make the types pickleable: + +# Shortcuts for pickle (reduces the pickle's length) +def _CS(definition): + return CharSet(definition) +def _TT(definition): + return TagTable(definition) +def _TS(match,translate,algorithm): + return TextSearch(match,translate,algorithm) +# Needed for backward compatibility: +def _BMS(match,translate): + return BMS(match,translate) +def _FS(match,translate): + return FS(match,translate) + +# Module init +class modinit: + + ### Register the types + + def pickle_CharSet(cs): + return _CS,(cs.definition,) + def pickle_TagTable(tt): + return _TT,(tt.compiled(),) + def pickle_TextSearch(ts): + return _TS,(ts.match, ts.translate, ts.algorithm) + copyreg.pickle(CharSetType, + pickle_CharSet, + _CS) + copyreg.pickle(TagTableType, + pickle_TagTable, + _TT) + copyreg.pickle(TextSearchType, + pickle_TextSearch, + _TS) + if 0: + def pickle_BMS(so): + return _BMS,(so.match,so.translate) + def pickle_FS(so): + return _FS,(so.match,so.translate) + copyreg.pickle(BMSType, + pickle_BMS, + _BMS) + copyreg.pickle(FSType, + pickle_FS, + _FS) + + +del modinit diff -Nru simpleparse-2.1.0a1/simpleparse/stt/TextTools/LICENSE simpleparse-2.2.0/simpleparse/stt/TextTools/LICENSE --- simpleparse-2.1.0a1/simpleparse/stt/TextTools/LICENSE 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/TextTools/LICENSE 2012-10-17 20:54:31.000000000 +0000 @@ -0,0 +1,106 @@ +________________________________________________________________________ + +EGENIX.COM PUBLIC LICENSE AGREEMENT VERSION 1.1.0 +________________________________________________________________________ + +1. Introduction + + This "License Agreement" is between eGenix.com Software, Skills + and Services GmbH ("eGenix.com"), having an office at + Pastor-Loeh-Str. 48, D-40764 Langenfeld, Germany, and the + Individual or Organization ("Licensee") accessing and otherwise + using this software in source or binary form and its associated + documentation ("the Software"). + +2. License + + Subject to the terms and conditions of this eGenix.com Public + License Agreement, eGenix.com hereby grants Licensee a + non-exclusive, royalty-free, world-wide license to reproduce, + analyze, test, perform and/or display publicly, prepare derivative + works, distribute, and otherwise use the Software alone or in any + derivative version, provided, however, that the eGenix.com Public + License Agreement is retained in the Software, or in any + derivative version of the Software prepared by Licensee. + +3. NO WARRANTY + + eGenix.com is making the Software available to Licensee on an "AS + IS" basis. SUBJECT TO ANY STATUTORY WARRANTIES WHICH CAN NOT BE + EXCLUDED, EGENIX.COM MAKES NO REPRESENTATIONS OR WARRANTIES, + EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, + EGENIX.COM MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY + OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT + THE USE OF THE SOFTWARE WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. + +4. LIMITATION OF LIABILITY + + EGENIX.COM SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF + THE SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES + OR LOSS (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF + BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS + INFORMATION, OR OTHER PECUNIARY LOSS) AS A RESULT OF USING, + MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY DERIVATIVE THEREOF, + EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + + SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF + INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THE ABOVE EXCLUSION OR + LIMITATION MAY NOT APPLY TO LICENSEE. + +5. Termination + + This License Agreement will automatically terminate upon a + material breach of its terms and conditions. + +6. Third Party Rights + + Any software or documentation in source or binary form provided + along with the Software that is associated with a separate license + agreement is licensed to Licensee under the terms of that license + agreement. This License Agreement does not apply to those portions + of the Software. Copies of the third party licenses are included + in the Software Distribution. + +7. General + + Nothing in this License Agreement affects any statutory rights of + consumers that cannot be waived or limited by contract. + + Nothing in this License Agreement shall be deemed to create any + relationship of agency, partnership, or joint venture between + eGenix.com and Licensee. + + If any provision of this License Agreement shall be unlawful, + void, or for any reason unenforceable, such provision shall be + modified to the extent necessary to render it enforceable without + losing its intent, or, if no such modification is possible, be + severed from this License Agreement and shall not affect the + validity and enforceability of the remaining provisions of this + License Agreement. + + This License Agreement shall be governed by and interpreted in all + respects by the law of Germany, excluding conflict of law + provisions. It shall not be governed by the United Nations + Convention on Contracts for International Sale of Goods. + + This License Agreement does not grant permission to use eGenix.com + trademarks or trade names in a trademark sense to endorse or + promote products or services of Licensee, or any third party. + + The controlling language of this License Agreement is English. If + Licensee has received a translation into another language, it has + been provided for Licensee's convenience only. + +8. Agreement + + By downloading, copying, installing or otherwise using the + Software, Licensee agrees to be bound by the terms and conditions + of this License Agreement. + + + For question regarding this License Agreement, please write to: + + eGenix.com Software, Skills and Services GmbH + Pastor-Loeh-Str. 48 + D-40764 Langenfeld + Germany diff -Nru simpleparse-2.1.0a1/simpleparse/stt/TextTools/Makefile.pkg simpleparse-2.2.0/simpleparse/stt/TextTools/Makefile.pkg --- simpleparse-2.1.0a1/simpleparse/stt/TextTools/Makefile.pkg 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/TextTools/Makefile.pkg 2012-10-17 20:54:31.000000000 +0000 @@ -0,0 +1,37 @@ +all: compile install + +# Compile target. This should setup the package and compile it +# into a state ready for installation. + +compile: + cd mxTextTools; \ + if ! test -f Makefile; then $(MAKE) -f Makefile.pre.in boot; fi; \ + $(MAKE) + +# Install target. This should do the install step. If the package +# needs no further installation step (i.e. the extension is not +# needed by other extension modules), then you can leave this target +# blank. + +install: + cd mxTextTools; \ + $(MAKE) install + +# Test target. Should do some testing of the extension. Writing +# something like 'ok' or 'works' and the end of a successful run. + +test: + cd mxTextTools; \ + python test.py + +# Documentation target. Use this to have the documentation for +# an extension generated at the user. + +doc: + # ??? + +# Cleanup before distributing the package +# +distclean: + cd mxTextTools; \ + $(MAKE) distclean diff -Nru simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/highcommands.h simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/highcommands.h --- simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/highcommands.h 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/highcommands.h 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,229 @@ +/* non-recursive high-level commands + + The contract here is: + + The commands may alter any of the tag-specific variables + + errors may be indicated if encountered in childReturnCode and the error* variables + +*/ + + case MATCH_SWORDSTART: + case MATCH_SWORDEND: + case MATCH_SFINDWORD: + /* these items basically follow the low-level contract, with the + only exception being that MATCH_SFINDWORD will change childStart + */ + { + Py_ssize_t wordstart, wordend; + int returnCode; + + DPRINTF("\nsWordStart/End/sFindWord :\n" + " in string = '%.40s'\n",text+childPosition); + childStart = childPosition; + returnCode = TE_SEARCHAPI( + match, + text, + childStart, + sliceright, + &wordstart, + &wordend + ); + if (returnCode < 0) { + childReturnCode = ERROR_CODE; + errorType = PyExc_SystemError; + errorMessage = PyString_FromFormat( + "Search-object search returned value < 0 (%i): probable bug in text processing engine", + returnCode + ); + } else if (returnCode == 0) { + /* not matched */ + DPRINTF(" (no success)\n"); + childReturnCode = FAILURE_CODE; + } else { + /* matched, adjust childPosition according to the word start/end/find requirements */ + if (command == MATCH_SWORDSTART) { + childPosition = wordstart; + } else { + childPosition = wordend; + } + if (command == MATCH_SFINDWORD) { + /* XXX logic problem with lookahead + should it reset to real childStart or + the fake one created here? */ + childStart = wordstart; + } + DPRINTF(" [%i:%i] (matched and remembered this slice)\n", + childStart,childPosition); + } + break; + } + + case MATCH_LOOP: + /* No clue what this is supposed to do, real surprising if it works... + + */ + DPRINTF("\nLoop: pre loop counter = %i\n",loopcount); + + if (loopcount > 0) { + /* we are inside a loop */ + loopcount--; + } else if (loopcount < 0) { + /* starting a new loop */ + if (PyInt_Check(match)) { + loopcount = PyInt_AS_LONG(match); + loopstart = childPosition; + } else { + childReturnCode = ERROR_CODE; + errorType = PyExc_TypeError; + errorMessage = PyString_FromFormat( + "Tag Table entry %d: expected an integer (command=Loop) got a %.50s", + (unsigned int)index, + Py_TYPE(match)->tp_name + ); + } + } + if (childReturnCode == NULL_CODE ) { + + if (loopcount == 0) { + /* finished loop */ + loopcount = -1; + } + if (loopstart == childPosition) { + /* not matched */ + childReturnCode = FAILURE_CODE; + } else { + childReturnCode = SUCCESS_CODE; + /* on success, add match from start of the whole loop to end of current iteration? + + Would be really good if I had a clue what this is supposed to do :) . + */ + childStart = loopstart; + } + DPRINTF("\nloop: post loop counter = %i\n",loopcount); + } + break; + + case MATCH_LOOPCONTROL: + + DPRINTF("\nLoopControl: loop counter = %i, " + "setting it to = %li\n", + loopcount,PyInt_AS_LONG(match)); + + loopcount = PyInt_AS_LONG(match); + break; + + case MATCH_CALL: + case MATCH_CALLARG: + /* call and callarg actually follow the low-level contract */ + + { + PyObject *fct = NULL; + int argc = -1; + + if (!PyTuple_Check(match)) { + argc = 0; + fct = match; + } else { + argc = PyTuple_GET_SIZE(match) - 1; + if (argc < 0) { + /* how is this even possible? */ + childReturnCode = ERROR_CODE; + errorType = PyExc_TypeError; + errorMessage = PyString_FromFormat( + "Tag Table entry %d: " + "expected a tuple (fct,arg0,arg1,...)" + "(command=CallArg)", + (unsigned int)index + ); + } else { + fct = PyTuple_GET_ITEM(match,0); + } + } + + if (childReturnCode == NULL_CODE && PyCallable_Check(fct)) { + PyObject *args; + register PyObject *w; + register Py_ssize_t argIndex; + + DPRINTF("\nCall[Arg] :\n"); + + childStart = childPosition; + + /* Build args = (textobj,childStart,sliceright[,arg0,arg1,...]) */ + args = PyTuple_New(3 + argc); + if (!args) { + childReturnCode = ERROR_CODE; + errorType = PyExc_SystemError; + errorMessage = PyString_FromFormat( + "Unable to create argument tuple for CallArgs command at index %d", + (unsigned int)index + ); + } else { + Py_INCREF(textobj); + PyTuple_SET_ITEM(args,0,textobj); + w = PyInt_FromLong(childStart); + if (!w){ + childReturnCode = ERROR_CODE; + errorType = PyExc_SystemError; + errorMessage = PyString_FromFormat( + "Unable to convert an integer %d to a Python Integer", + (unsigned int)childStart + ); + } else { + PyTuple_SET_ITEM(args,1,w); + w = PyInt_FromLong(sliceright); + if (!w) { + childReturnCode = ERROR_CODE; + errorType = PyExc_SystemError; + errorMessage = PyString_FromFormat( + "Unable to convert an integer %d to a Python Integer", + (unsigned int)sliceright + ); + } else { + PyTuple_SET_ITEM(args,2,w); + for (argIndex = 0; argIndex < argc; argIndex++) { + w = PyTuple_GET_ITEM(match,argIndex + 1); + Py_INCREF(w); + PyTuple_SET_ITEM(args,3 + argIndex,w); + } + /* now actually call the object */ + w = PyEval_CallObject(fct,args); + Py_DECREF(args); + if (w == NULL) { + childReturnCode = ERROR_CODE; + /* child's error should be allowed to propagate */ + } else if (!PyInt_Check(w)) { + childReturnCode = ERROR_CODE; + errorType = PyExc_TypeError; + errorMessage = PyString_FromFormat( + "Tag Table entry %d: matching function has to return an integer, returned a %.50s", + (unsigned int)index, + Py_TYPE(w)->tp_name + ); + } else { + childPosition = PyInt_AS_LONG(w); + Py_DECREF(w); + + if (childStart == childPosition) { + /* not matched */ + DPRINTF(" (no success)\n"); + childReturnCode = FAILURE_CODE; + } + } + } + } + } + } else { + childReturnCode = ERROR_CODE; + errorType = PyExc_TypeError; + errorMessage = PyString_FromFormat( + "Tag Table entry %d: " + "expected a callable object, got a %.50s" + "(command=Call[Arg])", + (unsigned int)index, + Py_TYPE(fct)->tp_name + ); + } + break; + } diff -Nru simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/__init__.py simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/__init__.py --- simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/__init__.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/__init__.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,23 @@ +""" mxTextTools - A tools package for fast text processing. + + Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com + Copyright (c) 2000-2003, eGenix.com Software GmbH; mailto:info@egenix.com + Copyright (c) 2003-2006, Mike Fletcher; mailto:mcfletch@vrplumber.com + See the documentation for further information on copyrights, + or contact the author. All Rights Reserved. +""" +from simpleparse.stt.TextTools.mxTextTools.mxTextTools import * +from simpleparse.stt.TextTools.mxTextTools.mxTextTools import __version__ + +# To maintain backward compatibility: +BMS = TextSearch +BMSType = TextSearchType +try: + TextSearch('',None,FASTSEARCH) +except: + FS = BMS + FSType = BMS +else: + def FS(match, translate=None): + return TextSearch(match, translate, FASTSEARCH) + FSType = TextSearchType \ No newline at end of file diff -Nru simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/lowlevelcommands.h simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/lowlevelcommands.h --- simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/lowlevelcommands.h 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/lowlevelcommands.h 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,325 @@ +/* Low-level matching commands code fragment + + The contract here is: + + all commands move forward through the buffer + + failure to move forward indicates failure of the tag + + moving forward indicates success of the tag + + errors may be indicated if encountered in childReturnCode and the error* variables + + only childPosition should be updated otherwise + +*/ +TE_CHAR *m = TE_STRING_AS_STRING(match); +if (m == NULL) { + childReturnCode = ERROR_CODE; + errorType = PyExc_TypeError; + errorMessage = PyString_FromFormat( + "Low-level command (%i) argument in entry %d couldn't be converted to a string object, is a %.50s", + command, + (unsigned int)index, + Py_TYPE(textobj)->tp_name + + ); +} else { + +switch (command) { + + case MATCH_ALLIN: + + { + register Py_ssize_t ml = TE_STRING_GET_SIZE(match); + register TE_CHAR *tx = &text[childPosition]; + + DPRINTF("\nAllIn :\n" + " looking for = '%.40s'\n" + " in string = '%.40s'\n",m,tx); + + if (ml > 1) { + for (; childPosition < sliceright; tx++, childPosition++) { + register Py_ssize_t j; + register TE_CHAR *mj = m; + register TE_CHAR ctx = *tx; + for (j=0; j < ml && ctx != *mj; mj++, j++) ; + if (j == ml) break; + } + } else if (ml == 1) { + /* one char only: use faster variant: */ + for (; childPosition < sliceright && *tx == *m; tx++, childPosition++) ; + } + break; + } + + case MATCH_ALLNOTIN: + + { + register Py_ssize_t ml = TE_STRING_GET_SIZE(match); + register TE_CHAR *tx = &text[childPosition]; + + DPRINTF("\nAllNotIn :\n" + " looking for = '%.40s'\n" + " not in string = '%.40s'\n",m,tx); + + if (ml != 1) { + for (; childPosition < sliceright; tx++, childPosition++) { + register Py_ssize_t j; + register TE_CHAR *mj = m; + register TE_CHAR ctx = *tx; + for (j=0; j < ml && ctx != *mj; mj++, j++) ; + if (j != ml) break; + } + } else { + /* one char only: use faster variant: */ + for (; childPosition < sliceright && *tx != *m; tx++, childPosition++) ; + } + break; + } + + case MATCH_IS: + + { + DPRINTF("\nIs :\n" + " looking for = '%.40s'\n" + " in string = '%.40s'\n",m,text+childPosition); + + if (childPosition < sliceright && *(&text[childPosition]) == *m) { + childPosition++; + } + break; + } + + case MATCH_ISIN: + + { + register Py_ssize_t ml = TE_STRING_GET_SIZE(match); + register TE_CHAR ctx = text[childPosition]; + + DPRINTF("\nIsIn :\n" + " looking for = '%.40s'\n" + " in string = '%.40s'\n",m,text+childPosition); + + if (ml > 0 && childPosition < sliceright) { + register Py_ssize_t j; + register TE_CHAR *mj = m; + for (j=0; j < ml && ctx != *mj; mj++, j++) ; + if (j != ml) childPosition++; + } + + break; + } + + case MATCH_ISNOTIN: + + { + register Py_ssize_t ml = TE_STRING_GET_SIZE(match); + register TE_CHAR ctx = text[childPosition]; + + DPRINTF("\nIsNotIn :\n" + " looking for = '%.40s'\n" + " not in string = '%.40s'\n",m,text+childPosition); + + if (ml > 0 && childPosition < sliceright) { + register Py_ssize_t j; + register TE_CHAR *mj = m; + for (j=0; j < ml && ctx != *mj; mj++, j++) ; + if (j == ml) childPosition++; + } + else + childPosition++; + + break; + } + + case MATCH_WORD: + + { + Py_ssize_t ml1 = TE_STRING_GET_SIZE(match) - 1; + register TE_CHAR *tx = &text[childPosition + ml1]; + register Py_ssize_t j = ml1; + register TE_CHAR *mj = &m[j]; + + DPRINTF("\nWord :\n" + " looking for = '%.40s'\n" + " in string = '%.40s'\n",m,&text[childPosition]); + + if (childPosition+ml1 >= sliceright) break; + + /* compare from right to left */ + for (; j >= 0 && *tx == *mj; + tx--, mj--, j--) ; + + if (j >= 0) /* not matched */ + childPosition = startPosition; /* reset */ + else + childPosition += ml1 + 1; + break; + } + + case MATCH_WORDSTART: + case MATCH_WORDEND: + + { + Py_ssize_t ml1 = TE_STRING_GET_SIZE(match) - 1; + + if (ml1 >= 0) { + register TE_CHAR *tx = &text[childPosition]; + + DPRINTF("\nWordStart/End :\n" + " looking for = '%.40s'\n" + " in string = '%.40s'\n",m,tx); + + /* Brute-force method; from right to left */ + for (;;) { + register Py_ssize_t j = ml1; + register TE_CHAR *mj = &m[j]; + + if (childPosition+j >= sliceright) { + /* reached eof: no match, rewind */ + childPosition = startPosition; + break; + } + + /* scan from right to left */ + for (tx += j; j >= 0 && *tx == *mj; + tx--, mj--, j--) ; + /* + DPRINTF("match text[%i+%i]: %c == %c\n", + childPosition,j,*tx,*mj); + */ + + if (j < 0) { + /* found */ + if (command == MATCH_WORDEND) childPosition += ml1 + 1; + break; + } + /* not found: rewind and advance one char */ + tx -= j - 1; + childPosition++; + } + } + + break; + } + +#if (TE_TABLETYPE == MXTAGTABLE_STRINGTYPE) + + /* Note: These two only work for 8-bit set strings. */ + case MATCH_ALLINSET: + + { + register TE_CHAR *tx = &text[childPosition]; + unsigned char *m = (unsigned char *)PyString_AS_STRING(match); + + DPRINTF("\nAllInSet :\n" + " looking for = set at 0x%lx\n" + " in string = '%.40s'\n",(long)match,tx); + + for (; + childPosition < sliceright && + (m[((unsigned char)*tx) >> 3] & + (1 << (*tx & 7))) > 0; + tx++, childPosition++) ; + + break; + } + + case MATCH_ISINSET: + + { + register TE_CHAR *tx = &text[childPosition]; + unsigned char *m = (unsigned char *)PyString_AS_STRING(match); + + DPRINTF("\nIsInSet :\n" + " looking for = set at 0x%lx\n" + " in string = '%.40s'\n",(long)match,tx); + + if (childPosition < sliceright && + (m[((unsigned char)*tx) >> 3] & + (1 << (*tx & 7))) > 0) + childPosition++; + + break; + } + +#endif + + case MATCH_ALLINCHARSET: + + { + Py_ssize_t matching; + + DPRINTF("\nAllInCharSet :\n" + " looking for = CharSet at 0x%lx\n" + " in string = '%.40s'\n", + (long)match, &text[childPosition]); + + matching = mxCharSet_Match(match, + textobj, + childPosition, + sliceright, + 1); + if (matching < 0) { + childReturnCode = ERROR_CODE; + errorType = PyExc_SystemError; + errorMessage = PyString_FromFormat( + "Character set match returned value < 0 (%d): probable bug in text processing engine", + (unsigned int)matching + ); + } else { + childPosition += matching; + } + break; + } + + case MATCH_ISINCHARSET: + + { + int test; + + DPRINTF("\nIsInCharSet :\n" + " looking for = CharSet at 0x%lx\n" + " in string = '%.40s'\n", + (long)match, &text[childPosition]); + +#if (TE_TABLETYPE == MXTAGTABLE_STRINGTYPE) + test = mxCharSet_ContainsChar(match, text[childPosition]); +#else + test = mxCharSet_ContainsUnicodeChar(match, text[childPosition]); +#endif + if (test < 0) { + childReturnCode = ERROR_CODE; + errorType = PyExc_SystemError; + errorMessage = PyString_FromFormat( + "Character set match returned value < 0 (%i): probable bug in text processing engine", + test + ); + } else if (test) { + childPosition++; + } + break; + } + default: + { + childReturnCode = ERROR_CODE; + errorType = PyExc_ValueError; + errorMessage = PyString_FromFormat( + "Unrecognised Low-Level command code %i, maximum low-level code is %i", + command, + MATCH_MAX_LOWLEVEL + ); + } +/* end of the switch, this child is finished */ +} +} /* end of the wrapping if-check */ + +/* simple determination for these commands (hence calling them low-level) */ +if (childReturnCode == NULL_CODE) { + if (childPosition > childStart) { + childReturnCode = SUCCESS_CODE; + } else { + childReturnCode = FAILURE_CODE; + } +} diff -Nru simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/Makefile.pre.in simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/Makefile.pre.in --- simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/Makefile.pre.in 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/Makefile.pre.in 2012-10-17 20:54:31.000000000 +0000 @@ -0,0 +1,305 @@ +# Universal Unix Makefile for Python extensions +# ============================================= + +# Short Instructions +# ------------------ + +# 1. Build and install Python (1.5 or newer). +# 2. "make -f Makefile.pre.in boot" +# 3. "make" +# You should now have a shared library. + +# Long Instructions +# ----------------- + +# Build *and install* the basic Python 1.5 distribution. See the +# Python README for instructions. (This version of Makefile.pre.in +# only withs with Python 1.5, alpha 3 or newer.) + +# Create a file Setup.in for your extension. This file follows the +# format of the Modules/Setup.dist file; see the instructions there. +# For a simple module called "spam" on file "spammodule.c", it can +# contain a single line: +# spam spammodule.c +# You can build as many modules as you want in the same directory -- +# just have a separate line for each of them in the Setup.in file. + +# If you want to build your extension as a shared library, insert a +# line containing just the string +# *shared* +# at the top of your Setup.in file. + +# Note that the build process copies Setup.in to Setup, and then works +# with Setup. It doesn't overwrite Setup when Setup.in is changed, so +# while you're in the process of debugging your Setup.in file, you may +# want to edit Setup instead, and copy it back to Setup.in later. +# (All this is done so you can distribute your extension easily and +# someone else can select the modules they actually want to build by +# commenting out lines in the Setup file, without editing the +# original. Editing Setup is also used to specify nonstandard +# locations for include or library files.) + +# Copy this file (Misc/Makefile.pre.in) to the directory containing +# your extension. + +# Run "make -f Makefile.pre.in boot". This creates Makefile +# (producing Makefile.pre and sedscript as intermediate files) and +# config.c, incorporating the values for sys.prefix, sys.exec_prefix +# and sys.version from the installed Python binary. For this to work, +# the python binary must be on your path. If this fails, try +# make -f Makefile.pre.in Makefile VERSION=1.5 installdir= +# where is the prefix used to install Python for installdir +# (and possibly similar for exec_installdir=). + +# Note: "make boot" implies "make clobber" -- it assumes that when you +# bootstrap you may have changed platforms so it removes all previous +# output files. + +# If you are building your extension as a shared library (your +# Setup.in file starts with *shared*), run "make" or "make sharedmods" +# to build the shared library files. If you are building a statically +# linked Python binary (the only solution of your platform doesn't +# support shared libraries, and sometimes handy if you want to +# distribute or install the resulting Python binary), run "make +# python". + +# Note: Each time you edit Makefile.pre.in or Setup, you must run +# "make Makefile" before running "make". + +# Hint: if you want to use VPATH, you can start in an empty +# subdirectory and say (e.g.): +# make -f ../Makefile.pre.in boot srcdir=.. VPATH=.. + + +# === Bootstrap variables (edited through "make boot") === + +# The prefix used by "make inclinstall libainstall" of core python +installdir= /usr/local + +# The exec_prefix used by the same +exec_installdir=$(installdir) + +# Source directory and VPATH in case you want to use VPATH. +# (You will have to edit these two lines yourself -- there is no +# automatic support as the Makefile is not generated by +# config.status.) +srcdir= . +VPATH= . + +# === Variables that you may want to customize (rarely) === + +# (Static) build target +TARGET= python + +# Installed python binary (used only by boot target) +PYTHON= python + +# Add more -I and -D options here +CFLAGS= $(OPT) -I$(INCLUDEPY) -I$(EXECINCLUDEPY) $(DEFS) + +# These two variables can be set in Setup to merge extensions. +# See example[23]. +BASELIB= +BASESETUP= + +# === Variables set by makesetup === + +MODOBJS= _MODOBJS_ +MODLIBS= _MODLIBS_ + +# === Definitions added by makesetup === + +# === Variables from configure (through sedscript) === + +VERSION= @VERSION@ +CC= @CC@ +LINKCC= @LINKCC@ +SGI_ABI= @SGI_ABI@ +OPT= @OPT@ +LDFLAGS= @LDFLAGS@ +LDLAST= @LDLAST@ +DEFS= @DEFS@ +LIBS= @LIBS@ +LIBM= @LIBM@ +LIBC= @LIBC@ +RANLIB= @RANLIB@ +MACHDEP= @MACHDEP@ +SO= @SO@ +LDSHARED= @LDSHARED@ +CCSHARED= @CCSHARED@ +LINKFORSHARED= @LINKFORSHARED@ +#@SET_CCC@ + +# Install prefix for architecture-independent files +prefix= /usr/local + +# Install prefix for architecture-dependent files +exec_prefix= $(prefix) + +# Uncomment the following two lines for AIX +#LINKCC= $(LIBPL)/makexp_aix $(LIBPL)/python.exp "" $(LIBRARY); $(PURIFY) $(CC) +#LDSHARED= $(LIBPL)/ld_so_aix $(CC) -bI:$(LIBPL)/python.exp + +# === Fixed definitions === + +# Shell used by make (some versions default to the login shell, which is bad) +SHELL= /bin/sh + +# Expanded directories +BINDIR= $(exec_installdir)/bin +LIBDIR= $(exec_prefix)/lib +MANDIR= $(installdir)/man +INCLUDEDIR= $(installdir)/include +SCRIPTDIR= $(prefix)/lib + +# Detailed destination directories +BINLIBDEST= $(LIBDIR)/python$(VERSION) +LIBDEST= $(SCRIPTDIR)/python$(VERSION) +INCLUDEPY= $(INCLUDEDIR)/python$(VERSION) +EXECINCLUDEPY= $(exec_installdir)/include/python$(VERSION) +LIBP= $(exec_installdir)/lib/python$(VERSION) +DESTSHARED= $(BINLIBDEST)/site-packages + +LIBPL= $(LIBP)/config + +PYTHONLIBS= $(LIBPL)/libpython$(VERSION).a + +MAKESETUP= $(LIBPL)/makesetup +MAKEFILE= $(LIBPL)/Makefile +CONFIGC= $(LIBPL)/config.c +CONFIGCIN= $(LIBPL)/config.c.in +SETUP= $(LIBPL)/Setup.local $(LIBPL)/Setup + +SYSLIBS= $(LIBM) $(LIBC) + +ADDOBJS= $(LIBPL)/python.o config.o + +# Portable install script (configure doesn't always guess right) +INSTALL= $(LIBPL)/install-sh -c +# Shared libraries must be installed with executable mode on some systems; +# rather than figuring out exactly which, we always give them executable mode. +# Also, making them read-only seems to be a good idea... +INSTALL_SHARED= ${INSTALL} -m 555 + +# === Fixed rules === + +# Default target. This builds shared libraries only +default: sharedmods + +# Build everything +all: static sharedmods + +# Build shared libraries from our extension modules +sharedmods: $(SHAREDMODS) + +# Build a static Python binary containing our extension modules +static: $(TARGET) +$(TARGET): $(ADDOBJS) lib.a $(PYTHONLIBS) Makefile $(BASELIB) + $(LINKCC) $(LDFLAGS) $(LINKFORSHARED) \ + $(ADDOBJS) lib.a $(PYTHONLIBS) \ + $(LINKPATH) $(BASELIB) $(MODLIBS) $(LIBS) $(SYSLIBS) \ + -o $(TARGET) $(LDLAST) + +install: sharedmods + if test ! -d $(DESTSHARED) ; then \ + mkdir $(DESTSHARED) ; else true ; fi + -for i in X $(SHAREDMODS); do \ + if test $$i != X; \ + then $(INSTALL_SHARED) $$i $(DESTSHARED)/$$i; \ + fi; \ + done + +# Build the library containing our extension modules +lib.a: $(MODOBJS) + -rm -f lib.a + ar cr lib.a $(MODOBJS) + -$(RANLIB) lib.a + +# This runs makesetup *twice* to use the BASESETUP definition from Setup +config.c Makefile: Makefile.pre Setup $(BASESETUP) $(MAKESETUP) + $(MAKESETUP) \ + -m Makefile.pre -c $(CONFIGCIN) Setup -n $(BASESETUP) $(SETUP) + $(MAKE) -f Makefile do-it-again + +# Internal target to run makesetup for the second time +do-it-again: + $(MAKESETUP) \ + -m Makefile.pre -c $(CONFIGCIN) Setup -n $(BASESETUP) $(SETUP) + +# Make config.o from the config.c created by makesetup +config.o: config.c + $(CC) $(CFLAGS) -c config.c + +# Setup is copied from Setup.in *only* if it doesn't yet exist +Setup: + cp $(srcdir)/Setup.in Setup + +# Make the intermediate Makefile.pre from Makefile.pre.in +Makefile.pre: Makefile.pre.in sedscript + sed -f sedscript $(srcdir)/Makefile.pre.in >Makefile.pre + +# Shortcuts to make the sed arguments on one line +P=prefix +E=exec_prefix +H=Generated automatically from Makefile.pre.in by sedscript. +L=LINKFORSHARED + +# Make the sed script used to create Makefile.pre from Makefile.pre.in +sedscript: $(MAKEFILE) + sed -n \ + -e '1s/.*/1i\\/p' \ + -e '2s%.*%# $H%p' \ + -e '/^VERSION=/s/^VERSION=[ ]*\(.*\)/s%@VERSION[@]%\1%/p' \ + -e '/^CC=/s/^CC=[ ]*\(.*\)/s%@CC[@]%\1%/p' \ + -e '/^CCC=/s/^CCC=[ ]*\(.*\)/s%#@SET_CCC[@]%CCC=\1%/p' \ + -e '/^LINKCC=/s/^LINKCC=[ ]*\(.*\)/s%@LINKCC[@]%\1%/p' \ + -e '/^OPT=/s/^OPT=[ ]*\(.*\)/s%@OPT[@]%\1%/p' \ + -e '/^LDFLAGS=/s/^LDFLAGS=[ ]*\(.*\)/s%@LDFLAGS[@]%\1%/p' \ + -e '/^LDLAST=/s/^LDLAST=[ ]*\(.*\)/s%@LDLAST[@]%\1%/p' \ + -e '/^DEFS=/s/^DEFS=[ ]*\(.*\)/s%@DEFS[@]%\1%/p' \ + -e '/^LIBS=/s/^LIBS=[ ]*\(.*\)/s%@LIBS[@]%\1%/p' \ + -e '/^LIBM=/s/^LIBM=[ ]*\(.*\)/s%@LIBM[@]%\1%/p' \ + -e '/^LIBC=/s/^LIBC=[ ]*\(.*\)/s%@LIBC[@]%\1%/p' \ + -e '/^RANLIB=/s/^RANLIB=[ ]*\(.*\)/s%@RANLIB[@]%\1%/p' \ + -e '/^MACHDEP=/s/^MACHDEP=[ ]*\(.*\)/s%@MACHDEP[@]%\1%/p' \ + -e '/^SO=/s/^SO=[ ]*\(.*\)/s%@SO[@]%\1%/p' \ + -e '/^LDSHARED=/s/^LDSHARED=[ ]*\(.*\)/s%@LDSHARED[@]%\1%/p' \ + -e '/^CCSHARED=/s/^CCSHARED=[ ]*\(.*\)/s%@CCSHARED[@]%\1%/p' \ + -e '/^SGI_ABI=/s/^SGI_ABI=[ ]*\(.*\)/s%@SGI_ABI[@]%\1%/p' \ + -e '/^$L=/s/^$L=[ ]*\(.*\)/s%@$L[@]%\1%/p' \ + -e '/^$P=/s/^$P=\(.*\)/s%^$P=.*%$P=\1%/p' \ + -e '/^$E=/s/^$E=\(.*\)/s%^$E=.*%$E=\1%/p' \ + $(MAKEFILE) >sedscript + echo "/^#@SET_CCC@/d" >>sedscript + echo "/^installdir=/s%=.*%= $(installdir)%" >>sedscript + echo "/^exec_installdir=/s%=.*%=$(exec_installdir)%" >>sedscript + echo "/^srcdir=/s%=.*%= $(srcdir)%" >>sedscript + echo "/^VPATH=/s%=.*%= $(VPATH)%" >>sedscript + echo "/^LINKPATH=/s%=.*%= $(LINKPATH)%" >>sedscript + echo "/^BASELIB=/s%=.*%= $(BASELIB)%" >>sedscript + echo "/^BASESETUP=/s%=.*%= $(BASESETUP)%" >>sedscript + +# Bootstrap target +boot: clobber + VERSION=`$(PYTHON) -c "import sys; print sys.version[:3]"`; \ + installdir=`$(PYTHON) -c "import sys; print sys.prefix"`; \ + exec_installdir=`$(PYTHON) -c "import sys; print sys.exec_prefix"`; \ + $(MAKE) -f $(srcdir)/Makefile.pre.in VPATH=$(VPATH) srcdir=$(srcdir) \ + VERSION=$$VERSION \ + installdir=$$installdir \ + exec_installdir=$$exec_installdir \ + Makefile + +# Handy target to remove intermediate files and backups +clean: + -rm -f *.o *~ + +# Handy target to remove everything that is easily regenerated +clobber: clean + -rm -f *.a tags TAGS config.c Makefile.pre $(TARGET) sedscript + -rm -f *.so *.sl so_locations + + +# Handy target to remove everything you don't want to distribute +distclean: clobber + -rm -f Makefile Setup diff -Nru simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/mxbmse.c simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/mxbmse.c --- simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/mxbmse.c 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/mxbmse.c 2012-10-17 20:54:31.000000000 +0000 @@ -0,0 +1,218 @@ +/* + mxbmse -- Fast Boyer Moore Search Algorithm (Version 0.9) + + The implementation is reentrant and thread safe. While the + general ideas behind the Boyer Moore algorithm are in the public + domain, this implementation falls under the following copyright: + + Copyright (c) 1997-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com + Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com + + All Rights Reserved + + See the documentation for copying information or contact the author + (mal@lemburg.com). +*/ + +/* to turn on the debugging printfs (DPRINTF):*/ +/* #define MAL_DEBUG */ + +/* Logging file used by debugging facility */ +#ifndef MAL_DEBUG_OUTPUTFILE +# define MAL_DEBUG_OUTPUTFILE "mxTextSearch.log" +#endif + +#ifdef MAL_DEBUG_WITH_PYTHON +# include "mx.h" +#endif + +#include "mxstdlib.h" +#include "mxbmse.h" + +/* --- Fast Boyer-Moore Implementation (8-bit) ---------------------------- */ + +mxbmse_data *bm_init(char *match, + int match_len) +{ + mxbmse_data *c; + int i; + BM_SHIFT_TYPE *shift; + char *m; + + c = newstruct(mxbmse_data); + c->match = match; + c->match_len = match_len; + c->eom = match + match_len - 1; + + /* Length 1 matching does not use a shift table */ + if (match_len == 1) + return c; + + /* Init shift table */ + for ( shift = c->shift, i = 256; i > 0; i--, shift++ ) + *shift = (BM_SHIFT_TYPE) match_len; + + DPRINTF("shift table for match='%s'\n",match); + for ( shift = c->shift, m = match, i = match_len - 1; + i >= 0; + i--, m++ ) { + shift[ (unsigned char) *m ] = (BM_SHIFT_TYPE) i; + DPRINTF(" char = '%c' shift = %i\n", *m, i); + } + + return c; +} + +void bm_free(mxbmse_data *c) +{ + if (c) + free(c); +} + +int bm_search(mxbmse_data *c, + char *text, + int start, + int text_len) +{ + register char *pt; + register char *eot = text + text_len; + + /* Error check */ + if (c == NULL) + return -1; + + /* Init text pointer */ + pt = text + start + c->match_len - 1; + + DPRINTF("Init : %2i %20.20s \t text: %2i %20.20s\n", + c->match_len,c->match,start,text+start); + + if (c->match_len > 1) + for (;;) { + register char *pm; + + pm = c->eom; + + for (;pt < eot && *pt != *pm; + pt += c->shift[(unsigned char) *pt]); + + if (pt >= eot) + break; + + /* First char matches.. what about the others ? */ + { + register int im = c->match_len; + + do { + DPRINTF("=match: %2i '%20.20s' \t text: '%20.20s'\n", + im,pm,pt); + if (--im == 0) + /* Match */ + return pt - text + c->match_len; + pt--; + pm--; + } while (*pt == *pm); + + /* Mismatch after match: use shift-table */ + { + register int a,b; + + a = c->shift[(unsigned char) *pt]; + b = c->match_len - im + 1; + DPRINTF("!match: %2i '%20.20s' \t text: '%20.20s' " + "(sh=%i)\n", + im,pm,pt,max(a,b)); + pt += (a > b) ? a : b; + } + } + + } + + /* Special case: matching string has length 1 */ + else { + register char m = *c->eom; + + for (;pt < eot; pt++) + if (*pt == m) + /* Match */ + return pt - text + 1; + } + + return start; /* no match */ +} + +/* bm search using the translate table -- 45% slower */ + +int bm_tr_search(mxbmse_data *c, + char *text, + int start, + int text_len, + char *tr) +{ + register char *pt; + register char *eot = text + text_len; + + /* Error check */ + if (c == NULL) + return -1; + + /* Init text pointer */ + pt = text + start + c->match_len - 1; + + DPRINTF("Init : %2i '%20.20s' \t text: %2i '%20.20s'\n", + c->match_len,c->match,start,text+start); + + if (c->match_len > 1) + for (;;) { + register char *pm; + + pm = c->eom; + + for (;pt < eot && tr[(unsigned char) *pt] != *pm; + pt += c->shift[(unsigned char) tr[(unsigned char) *pt]]); + + if (pt >= eot) + break; + + /* First char matches.. what about the others ? */ + { + register int im = c->match_len; + + do { + DPRINTF("=match: %2i '%20.20s' \t text: '%20.20s'\n", + im,pm,pt); + if (--im == 0) + /* Match */ + return pt - text + c->match_len; + pt--; + pm--; + } while (tr[(unsigned char) *pt] == *pm); + + /* Mismatch after match: use shift-table */ + { + register int a,b; + + a = c->shift[(unsigned char) tr[(unsigned char) *pt]]; + b = c->match_len - im + 1; + DPRINTF("!match: %2i '%20.20s' \t text: '%20.20s' " + "(sh=%i)\n", + im,pm,pt,max(a,b)); + pt += (a > b)?a:b; + } + } + + } + + /* Special case: matching string has length 1 */ + else { + register char m = *c->eom; + + for (;pt < eot; pt++) + if (*pt == m) + /* Match */ + return pt - text + 1; + } + + return start; /* no match */ +} + diff -Nru simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/mxbmse.h simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/mxbmse.h --- simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/mxbmse.h 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/mxbmse.h 2012-10-17 20:54:31.000000000 +0000 @@ -0,0 +1,65 @@ +#ifndef MXBMSE_H +#define MXBMSE_H +/* + mxbmse -- Fast Boyer Moore Search Algorithm (Version 0.8) + + The implementation is reentrant and thread safe. While the + general idea behind the Boyer Moore algorithm are in the public + domain, this implementation falls under the following copyright: + + Copyright (c) 1997-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com + Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com + + All Rights Reserved + + See the documentation for copying information or contact the author + (mal@lemburg.com). + +*/ + +#ifdef __cplusplus +extern "C" { +#endif + +/* --- Fast Boyer-Moore Implementation (8-bit) ---------------------------- */ + +/* sanity check switches */ +/*#define SAFER 1*/ + +/* SHIFT must have enough bits to store len(match) + - using 'char' here makes the routines run 15% slower than + with 'int', on the other hand, 'int' is at least 4 times + larger than 'char' +*/ +#ifndef BM_SHIFT_TYPE +# define BM_SHIFT_TYPE int +#endif + +typedef struct { + char *match; + int match_len; + char *eom; + char *pt; + BM_SHIFT_TYPE shift[256]; /* char-based shift table */ +} mxbmse_data; + +extern mxbmse_data *bm_init(char *match, + int match_len); +extern void bm_free(mxbmse_data *c); +extern int bm_search(mxbmse_data *c, + char *text, + int start, + int stop); +extern int bm_tr_search(mxbmse_data *c, + char *text, + int start, + int stop, + char *tr); + +#define BM_MATCH_LEN(bm) ((mxbmse_data *)bm)->match_len + +/* EOF */ +#ifdef __cplusplus +} +#endif +#endif diff -Nru simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/mx.h simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/mx.h --- simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/mx.h 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/mx.h 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,490 @@ +#ifndef MX_H +#define MX_H + +/* + mx -- Marc's eXtension modules for Python: basic macros + + This file is only meant to be included by the extension modules. + DO NOT include it in the extension module's header file, since it + will definitely cause troubles then. + + To enable debugging ceratin things, define one of these before + including this file: + + MAL_REF_DEBUG -- debug reference counts (Py_MY_xxx) [this file] + MAL_DEBUG -- enable debug output (DPRINTF) [mxstdlib.h] + MAL_MEM_DEBUG -- enable malloc output (new,cnew,free,...) [mxstdlib.h] + + Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com + Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com + See the documentation for further copyright information or contact + the author. + +*/ + +/* --- Platform or compiler specific tweaks ------------------------------- */ + +/* Add some platform specific symbols to enable work-arounds for the + static forward declaration of type definitions; note that the GNU C + compiler does not have this problem. + + Many thanks to all who have contributed to this list. + +*/ +#if (!defined(__GNUC__)) +# if (defined(NeXT) || defined(sgi) || defined(_AIX) || (defined(__osf__) && defined(__DECC)) || defined(TrueCompaq64) || defined(__VMS)) +# define BAD_STATIC_FORWARD +# endif +#endif + +/* Some more tweaks for various platforms. */ + +/* VMS needs this define. Thanks to Jean-François PIÉRONNE */ +#if defined(__VMS) +# define __SC__ +#endif + +/* xlC on AIX doesn't like the Python work-around for static forwards + in ANSI mode (default), so we switch on extended mode. Thanks to + Albert Chin-A-Young */ +#if defined(__xlC__) +# pragma langlvl extended +#endif + +/* --- Standard header files ---------------------------------------------- */ + +/* Include the generic mx header file */ +#include "mxh.h" + +/* Include nearly all Python symbols & definitions */ +#include "Python.h" + +/* Include other standard stuff */ +#include "mxstdlib.h" + +/* Include Python backward compatibility stuff */ +#include "mxpyapi.h" + +/* --- Compiler support --------------------------------------------------- */ + +/* Support for compilers which don't like static forward declarations + of arrays; Python 2.3 removed the support for BAD_STATIC_FORWARD + which is why we now use our own little helpers here. */ +#undef staticforward +#undef statichere +#ifdef BAD_STATIC_FORWARD +# define staticforward extern +# define statichere static +#else +# define staticforward static +# define statichere static +#endif + +/* --- Declare macros ----------------------------------------------------- */ + +#define Py_NONE (Py_INCREF(Py_None),Py_None) + +#ifdef MAL_REF_DEBUG +# define printref(x) printf("* refcount for "#x" = %i\n",(long) x->ob_refcnt); +#else +# define printref(x) +#endif + +/* --- Error handling ----------------------------------------------------- */ + +#define Py_Do(x) {if (!(x)) goto onError;} +#define Py_ReturnOnError(errortype,errorstr) {PyErr_SetString(errortype,errorstr);return NULL;} + +#define Py_Assert(x,errortype,errorstr) {if (!(x)) {PyErr_SetString(errortype,errorstr);goto onError;}} +#define Py_AssertWithArg(x,errortype,errorstr,a1) {if (!(x)) {PyErr_Format(errortype,errorstr,a1);goto onError;}} +#define Py_AssertWith2Args(x,errortype,errorstr,a1,a2) {if (!(x)) {PyErr_Format(errortype,errorstr,a1,a2);goto onError;}} +#define Py_AssertWith3Args(x,errortype,errorstr,a1,a2,a3) {if (!(x)) {PyErr_Format(errortype,errorstr,a1,a2,a3);goto onError;}} + +#define Py_Error(errortype,errorstr) {PyErr_SetString(errortype,errorstr);goto onError;} +#define Py_ErrorWithArg(errortype,errorstr,a1) {PyErr_Format(errortype,errorstr,a1);goto onError;} +#define Py_ErrorWith2Args(errortype,errorstr,a1,a2) {PyErr_Format(errortype,errorstr,a1,a2);goto onError;} +#define Py_ErrorWith3Args(errortype,errorstr,a1,a2,a3) {PyErr_Format(errortype,errorstr,a1,a2,a3);goto onError;} + +/* --- Reference counting ------------------------------------------------- */ + +#ifdef MAL_REF_DEBUG + +static void mx_Py_INCREF(PyObject *v, + char *name, + char *filename, + int lineno) +{ + if (!Py_DebugFlag) { + Py_XINCREF(v); + return; + } + if (!v) + mxDebugPrintf("[%s:%5i] Py_XINCREF( %-8s == NULL );\n", + filename,lineno,name); + else { + Py_INCREF(v);; + mxDebugPrintf("[%s:%5i] Py_XINCREF( %-8s at 0x%x [%s]); " + "new refcount = %i\n", + filename,lineno,name,(int)v,Py_TYPE(v)->tp_name, + v->ob_refcnt); + } +} + +static void mx_Py_DECREF(PyObject *v, + char *name, + char *filename, + int lineno) +{ + if (!Py_DebugFlag) { + Py_XDECREF(v); + return; + } + if (!v) + mxDebugPrintf("[%s:%5i] Py_XDECREF( %-8s == NULL );\n", + filename,lineno,name); + else { + int refcnt = v->ob_refcnt; + Py_DECREF(v); + if (refcnt <= 1) + mxDebugPrintf("[%s:%5i] Py_XDECREF( %-8s at 0x%x [%s]); " + "object deleted\n", + filename,lineno,name,(int)v,Py_TYPE(v)->tp_name); + else + mxDebugPrintf("[%s:%5i] Py_XDECREF( %-8s at 0x%x [%s]); " + "new refcount = %i\n", + filename,lineno,name,(int)v,Py_TYPE(v)->tp_name, + v->ob_refcnt); + } +} + +static void mx_Py_PRINT_REFCOUNT(PyObject *v, + char *name, + char *filename, + int lineno) +{ + if (!v) + mxDebugPrintf("[%s:%5i] Py_PRINT_REFCOUNT( %-8s == NULL );\n", + filename,lineno,name); + else { + mxDebugPrintf("[%s:%5i] Py_PRINT_REFCOUNT( %-8s at 0x%x [%s]) = %i;\n", + filename,lineno,name,(int)v,Py_TYPE(v)->tp_name, + v->ob_refcnt); + } +} + +# undef Py_INCREF +# define Py_INCREF(x) mx_Py_INCREF((PyObject *)x,#x,__FILE__,__LINE__) +# undef Py_DECREF +# define Py_DECREF(x) mx_Py_DECREF((PyObject *)x,#x,__FILE__,__LINE__) +# undef Py_XINCREF +# define Py_XINCREF(x) mx_Py_INCREF((PyObject *)x,#x,__FILE__,__LINE__) +# undef Py_XDECREF +# define Py_XDECREF(x) mx_Py_DECREF((PyObject *)x,#x,__FILE__,__LINE__) +# define Py_DELETE(x) {if (x->ob_refcnt > 1) mxDebugPrintf("[%s:%5i] Py_DELETE( "#x" ) WARNING: Refcount = %i > 1\n",__FILE__,__LINE__,(int)x->ob_refcnt);Py_DECREF(x);} +# define Py_PRINT_REFCOUNT(x) mx_Py_PRINT_REFCOUNT((PyObject *)x,#x,__FILE__,__LINE__) +#else +# define Py_DELETE(x) Py_DECREF(x) +# define Py_PRINT_REFCOUNT(x) +#endif + +#define Py_DEC_REF(x) {Py_XDECREF(x); x=0;} /* doing this once too often doesn't hurt */ + +/* Unreference a Python object. This is only used in Python debug + builds and needed to keep track of all allocated references. Use in + object constructors or free list implementations. */ +#ifndef _Py_DEC_REFTOTAL +# ifdef Py_REF_DEBUG +# define _Py_DEC_REFTOTAL _Py_RefTotal-- +# else +# define _Py_DEC_REFTOTAL +# endif +#endif +#define mxPy_UNREF(x) _Py_DEC_REFTOTAL + +/* --- Argument passing and checking -------------------------------------- */ + +/* No arguments expected; also use Py_MethodListEntryNoArgs() for this + kind of fct; this check is no longer needed in Python 2.3 and + later */ +#if PY_VERSION_HEX >= 0x02030000 +# define Py_NoArgsCheck() {if (0) goto onError;} +#else +# define Py_NoArgsCheck() {if (!PyArg_NoArgs(args)) goto onError;} +#endif + +/* For functions with old style args (Py_MethodListEntrySingleArg) */ +#define Py_GetArgObject(a) {a = args; if (!a) {PyErr_SetString(PyExc_TypeError,"function/method requires an argument"); goto onError;}} +#define Py_GetSingleArg(format,a1) {if (!PyArg_Parse(args,format,&a1)) goto onError;} + +/* For functions with new style args: */ +#define Py_GetArg(format,a1) {if (!PyArg_ParseTuple(args,format,&a1)) goto onError;} +#define Py_Get2Args(format,a1,a2) {if (!PyArg_ParseTuple(args,format,&a1,&a2)) goto onError;} +#define Py_Get3Args(format,a1,a2,a3) {if (!PyArg_ParseTuple(args,format,&a1,&a2,&a3)) goto onError;} +#define Py_Get4Args(format,a1,a2,a3,a4) {if (!PyArg_ParseTuple(args,format,&a1,&a2,&a3,&a4)) goto onError;} +#define Py_Get5Args(format,a1,a2,a3,a4,a5) {if (!PyArg_ParseTuple(args,format,&a1,&a2,&a3,&a4,&a5)) goto onError;} +#define Py_Get6Args(format,a1,a2,a3,a4,a5,a6) {if (!PyArg_ParseTuple(args,format,&a1,&a2,&a3,&a4,&a5,&a6)) goto onError;} +#define Py_Get7Args(format,a1,a2,a3,a4,a5,a6,a7) {if (!PyArg_ParseTuple(args,format,&a1,&a2,&a3,&a4,&a5,&a6,&a7)) goto onError;} +#define Py_Get8Args(format,a1,a2,a3,a4,a5,a6,a7,a8) {if (!PyArg_ParseTuple(args,format,&a1,&a2,&a3,&a4,&a5,&a6,&a7,&a8)) goto onError;} + +/* For functions with keywords -- the first macro parameter must be + the keywords array given as e.g. + + static char *keywords[] = {"first","second","third", 0}; + + with an entry for every argument (in the correct order). The + functions must be included in the method list using + Py_MethodWithKeywordsListEntry() and be declared as + Py_C_Function_WithKeywords(). + +*/ +#define Py_KeywordGetArg(keywords,format,a1) {if (!PyArg_ParseTupleAndKeywords(args,kws,format,keywords,&a1)) goto onError;} +#define Py_KeywordGet2Args(keywords,format,a1,a2) {if (!PyArg_ParseTupleAndKeywords(args,kws,format,keywords,&a1,&a2)) goto onError;} +#define Py_KeywordGet3Args(keywords,format,a1,a2,a3) {if (!PyArg_ParseTupleAndKeywords(args,kws,format,keywords,&a1,&a2,&a3)) goto onError;} +#define Py_KeywordGet4Args(keywords,format,a1,a2,a3,a4) {if (!PyArg_ParseTupleAndKeywords(args,kws,format,keywords,&a1,&a2,&a3,&a4)) goto onError;} +#define Py_KeywordGet5Args(keywords,format,a1,a2,a3,a4,a5) {if (!PyArg_ParseTupleAndKeywords(args,kws,format,keywords,&a1,&a2,&a3,&a4,&a5)) goto onError;} +#define Py_KeywordGet6Args(keywords,format,a1,a2,a3,a4,a5,a6) {if (!PyArg_ParseTupleAndKeywords(args,kws,format,keywords,&a1,&a2,&a3,&a4,&a5,&a6)) goto onError;} +#define Py_KeywordGet7Args(keywords,format,a1,a2,a3,a4,a5,a6,a7) {if (!PyArg_ParseTupleAndKeywords(args,kws,format,keywords,&a1,&a2,&a3,&a4,&a5,&a6,&a7)) goto onError;} +#define Py_KeywordGet8Args(keywords,format,a1,a2,a3,a4,a5,a6,a7,a8) {if (!PyArg_ParseTupleAndKeywords(args,kws,format,keywords,&a1,&a2,&a3,&a4,&a5,&a6,&a7,&a8)) goto onError;} + +/* New style macros fof functions supporting keywords -- the C + variable names are used as template for the keyword list, i.e. they + must match the Python keyword parameter names. + + Note that format strings with special parameters (e.g. "#s") are + not allowed since they would cause the keyword list to be out of + sync. + + The functions must be included in the method list using + Py_MethodWithKeywordsListEntry() and be declared as + Py_C_Function_WithKeywords(). + + Example: + + Py_C_Function_WithKeywords( + myfunction, + "myfunction(filename,dupkeys=0,filemode=0,sectorsize=512)\n\n" + "Returns a myobject" + ) + { + char *filename; + int sectorsize = 512; + int dupkeys = 0; + int filemode = 0; + + Py_KeywordsGet4Args("s|iii", + filename,dupkeys,filemode,sectorsize); + + return (PyObject *)myobject_New(filename, + filemode, + sectorsize, + dupkeys); + onError: + return NULL; + } + +*/ +#define Py_KeywordsGetArg(format,a1) {static char *kwslist[] = {#a1,NULL}; if (!PyArg_ParseTupleAndKeywords(args,kws,format,kwslist,&a1)) goto onError;} +#define Py_KeywordsGet2Args(format,a1,a2) {static char *kwslist[] = {#a1,#a2,NULL}; if (!PyArg_ParseTupleAndKeywords(args,kws,format,kwslist,&a1,&a2)) goto onError;} +#define Py_KeywordsGet3Args(format,a1,a2,a3) {static char *kwslist[] = {#a1,#a2,#a3,NULL}; if (!PyArg_ParseTupleAndKeywords(args,kws,format,kwslist,&a1,&a2,&a3)) goto onError;} +#define Py_KeywordsGet4Args(format,a1,a2,a3,a4) {static char *kwslist[] = {#a1,#a2,#a3,#a4,NULL}; if (!PyArg_ParseTupleAndKeywords(args,kws,format,kwslist,&a1,&a2,&a3,&a4)) goto onError;} +#define Py_KeywordsGet5Args(format,a1,a2,a3,a4,a5) {static char *kwslist[] = {#a1,#a2,#a3,#a4,#a5,NULL}; if (!PyArg_ParseTupleAndKeywords(args,kws,format,kwslist,&a1,&a2,&a3,&a4,&a5)) goto onError;} +#define Py_KeywordsGet6Args(format,a1,a2,a3,a4,a5,a6) {static char *kwslist[] = {#a1,#a2,#a3,#a4,#a5,#a6,NULL}; if (!PyArg_ParseTupleAndKeywords(args,kws,format,kwslist,&a1,&a2,&a3,&a4,&a5,&a6)) goto onError;} +#define Py_KeywordsGet7Args(format,a1,a2,a3,a4,a5,a6,a7) {static char *kwslist[] = {#a1,#a2,#a3,#a4,#a5,#a6,#a7,NULL}; if (!PyArg_ParseTupleAndKeywords(args,kws,format,kwslist,&a1,&a2,&a3,&a4,&a5,&a6,&a7)) goto onError;} +#define Py_KeywordsGet8Args(format,a1,a2,a3,a4,a5,a6,a7,a8) {static char *kwslist[] = {#a1,#a2,#a3,#a4,#a5,#a6,#a7,#a8,NULL}; if (!PyArg_ParseTupleAndKeywords(args,kws,format,kwslist,&a1,&a2,&a3,&a4,&a5,&a6,&a7,&a8)) goto onError;} + +/* --- Returning values to Python ----------------------------------------- */ + +/* XXX Don't always work: every time you have an 'O' in the BuildValue format + string, you need to DECREF the variable *after* the tuple has been + built !!! +*/ + +#define Py_ReturnNone() {Py_INCREF(Py_None);return Py_None;} +#define Py_ReturnTrue() {Py_INCREF(Py_True);return Py_True;} +#define Py_ReturnFalse() {Py_INCREF(Py_False);return Py_False;} +#define Py_ReturnArg(format,a1) return Py_BuildValue(format,a1); +#define Py_Return Py_ReturnArg +#define Py_Return2Args(format,a1,a2) return Py_BuildValue(format,a1,a2); +#define Py_Return2 Py_Return2Args +#define Py_Return3Args(format,a1,a2,a3) return Py_BuildValue(format,a1,a2,a3); +#define Py_Return3 Py_Return3Args +#define Py_Return4Args(format,a1,a2,a3) return Py_BuildValue(format,a1,a2,a3,a4); +#define Py_Return5Args(format,a1,a2,a3) return Py_BuildValue(format,a1,a2,a3,a4,a5); +#define Py_Return6Args(format,a1,a2,a3) return Py_BuildValue(format,a1,a2,a3,a4,a5,a6); +#define Py_Return7Args(format,a1,a2,a3) return Py_BuildValue(format,a1,a2,a3,a4,a5,a6,a7); + +/* Build values */ + +#define Py_BuildNone() Py_NONE +#define Py_Build(format,x) Py_BuildValue(format,x) +#define Py_Build2(format,x,y) Py_BuildValue(format,x,y) +#define Py_Build3(format,x,y,z) Py_BuildValue(format,x,y,z) + +/* --- Declaring Python builtin functions/methods ------------------------- */ + +/* Declare C function/method fct, having docstring docstr; may use vargargs */ +#define Py_C_Function(fct,docstr) \ + static char fct##_docstring[] = docstr;\ + static PyObject *fct(PyObject *self, PyObject *args) + +/* Declare C function/method fct, having keywords keywordsarray and a + docstring docstr; may use vargargs & keywords */ +#define Py_C_Function_WithKeywords(fct,docstr) \ + static char fct##_docstring[] = docstr;\ + static PyObject *fct(PyObject *self, PyObject *args, PyObject *kws) + +/* These declare: self -- instance pointer for methods, NULL for functions + args -- argument tuple + kws -- keywords dict (if applicable) + plus as statics: + _docstring -- the docstring as given + _keywords -- the keyword array as given + + Note: use the Py_GetArg macros for functions without keywords, + and Py_KeywordGetArg macros for functions with keywords +*/ + +/* --- Method list entries for builtin functions/methods ------------------ */ + +/* Add a C function/method cname to the module dict as pyname; no + doc-string */ +#define Py_MethodListEntryAny(pyname,cname) {pyname,(PyCFunction)cname,METH_VARARGS} + +/* Add a C function/method cname to the module dict as pyname; the + function can use varargs */ +#define Py_MethodListEntry(pyname,cname) {pyname,(PyCFunction)cname,METH_VARARGS,cname##_docstring} + +/* Add a C function/method cname to the module dict as pyname; the + function takes no args; in Python 2.3 a new flag was added for + these which implements the no args check in the interpreter + itself. */ +#ifdef METH_NOARGS +# define Py_MethodListEntryNoArgs(pyname,cname) {pyname,(PyCFunction)cname,METH_NOARGS,cname##_docstring} +#else +# define Py_MethodListEntryNoArgs(pyname,cname) {pyname,(PyCFunction)cname,0,cname##_docstring} +#endif + +/* Add a C function/method cname to the module dict as pyname; the + function takes one argument: the object is passed in directly + (without wrapping it into a tuple first), i.e. don't use + the Py_GetArg-macros or PyArg_ParseTuple(). */ +#define Py_MethodListEntrySingleArg(pyname,cname) {pyname,(PyCFunction)cname,0,cname##_docstring} + +/* Add a C function/method that uses keywords to the module dict */ +#define Py_MethodWithKeywordsListEntry(pyname,cname) {pyname,(PyCFunction)cname,METH_VARARGS | METH_KEYWORDS,cname##_docstring} + + +/* --- Sequence slicing --------------------------------------------------- */ + +/* Check a given slice and apply the usual rules for negative indices */ +#define Py_CheckSequenceSlice(len,start,stop) { \ + if (stop > len) \ + stop = len; \ + else { \ + if (stop < 0) \ + stop += len; \ + if (stop < 0) \ + stop = 0; \ + } \ + if (start < 0) { \ + start += len; \ + if (start < 0) \ + start = 0; \ + } \ + if (stop < start) \ + start = stop; \ + } + +/* --- Text macros -------------------------------------------------------- */ + +/* Check a given text slice and apply the usual rules for negative + indices */ +#define Py_CheckBufferSlice(textlen,start,stop) \ + Py_CheckSequenceSlice(textlen,start,stop) + +/* Dito for string objects */ +#define Py_CheckStringSlice(textobj,start,stop) \ + Py_CheckSequenceSlice(PyString_GET_SIZE(textobj),start,stop) + +/* For b/w compatibility */ +#define Py_CheckSlice(textobj,start,stop) \ + Py_CheckStringSlice(textobj,start,stop) + +/* Dito for Unicode objects */ +#ifdef PyUnicode_GET_SIZE +# define Py_CheckUnicodeSlice(unicode,start,stop) \ + Py_CheckSequenceSlice(PyUnicode_GET_SIZE(unicode),start,stop) +#endif + +/* This assumes that fixed is a constant char array; the strcmp + function is only called in case the attribute name length exceeds + 10 characters and the first 10 characters match; optimizing + compilers should eliminate any unused parts of this comparison + automatically. + + Note: The latest egcs compiler warns about the subscripts being out + of range for shorter fixed strings; since no code is generated for + those comparisons, these warning can safely be ignored. Still, they + are annoying. See the Py_StringsCompareEqual() macro below for a + way to work around this. + +*/ +#define Py_StringsCompareEqualEx(var,fixed,fixedsize) \ + (var[0] == fixed[0] && \ + (fixed[0] == 0 || \ + (fixedsize >= 1 && (var[1] == fixed[1] && \ + (fixed[1] == 0 || \ + (fixedsize >= 2 && (var[2] == fixed[2] && \ + (fixed[2] == 0 || \ + (fixedsize >= 3 && (var[3] == fixed[3] && \ + (fixed[3] == 0 || \ + (fixedsize >= 4 && (var[4] == fixed[4] && \ + (fixed[4] == 0 || \ + (fixedsize >= 5 && (var[5] == fixed[5] && \ + (fixed[5] == 0 || \ + (fixedsize >= 6 && (var[6] == fixed[6] && \ + (fixed[6] == 0 || \ + (fixedsize >= 7 && (var[7] == fixed[7] && \ + (fixed[7] == 0 || \ + (fixedsize >= 8 && (var[8] == fixed[8] && \ + (fixed[8] == 0 || \ + (fixedsize >= 9 && (var[9] == fixed[9] && \ + (fixed[9] == 0 || \ + (fixedsize >= 10 && \ + strcmp(&var[10],&fixed[10]) == 0 \ + )))))))))))))))))))))))))))))) + +/* This assumes that fixed is a constant char array. + + The appended string snippet is to shut up the warnings produced by + newer egcs/gcc compilers about offsets being outside bounds. + + Note that some compilers do the inlining by themselves or don't + like the above trick (OpenVMS is one such platform). For these we + simply use the standard way. + +*/ + +#ifndef __VMS +# define Py_StringsCompareEqual(var,fixed) \ + Py_StringsCompareEqualEx(var,fixed"\0\0\0\0\0\0\0\0\0\0",sizeof(fixed)) +#else +# define Py_StringsCompareEqual(var,fixed) (strcmp(var, fixed) == 0) +#endif + +/* Fast character set member check; set must be a "static unsigned + *char set" array of exactly 32 bytes length generated with + TextTools.set() */ +#define Py_CharInSet(chr,set) \ + (((unsigned char)(set)[(unsigned char)(chr) >> 3] & \ + (1 << ((unsigned char)(chr) & 7))) != 0) + +/* --- SWIG addons -------------------------------------------------------- */ + +/* Throw this error after having set the correct Python exception + using e.g. PyErr_SetString(); */ +#define mxSWIGError "mxSWIGError" + +/* EOF */ +#endif + diff -Nru simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/mxh.h simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/mxh.h --- simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/mxh.h 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/mxh.h 2012-10-17 20:54:31.000000000 +0000 @@ -0,0 +1,60 @@ +#ifndef MXH_H +#define MXH_H + +/* + mxh.h -- Generic header file for all mx Extenstions + + This file should be included by every mx Extension header file + and the C file. + + Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com + Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com + See the documentation for further copyright information or contact + the author. + +*/ + +/* + Macros to control export and import of DLL symbols. + + We use our own definitions since Python's don't allow specifying + both imported and exported symbols at the same time; these defines + haven't been thoroughly tested yet, patches are most welcome :-) + +*/ + +/* Macro to "mark" a symbol for DLL export */ + +#if (defined(_MSC_VER) && _MSC_VER > 850 \ + || defined(__MINGW32__) || defined(__CYGWIN) || defined(__BEOS__)) +# ifdef __cplusplus +# define MX_EXPORT(type) extern "C" type __declspec(dllexport) +# else +# define MX_EXPORT(type) extern type __declspec(dllexport) +# endif +#elif defined(__WATCOMC__) +# define MX_EXPORT(type) extern type __export +#elif defined(__IBMC__) +# define MX_EXPORT(type) extern type _Export +#else +# define MX_EXPORT(type) extern type +#endif + +/* Macro to "mark" a symbol for DLL import */ + +#if defined(__BORLANDC__) +# define MX_IMPORT(type) extern type __import +#elif (defined(_MSC_VER) && _MSC_VER > 850 \ + || defined(__MINGW32__) || defined(__CYGWIN) || defined(__BEOS__)) +# ifdef __cplusplus +# define MX_IMPORT(type) extern "C" type __declspec(dllimport) +# else +# define MX_IMPORT(type) extern type __declspec(dllimport) +# endif + +#else +# define MX_IMPORT(type) extern type +#endif + +/* EOF */ +#endif diff -Nru simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/mxpyapi.h simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/mxpyapi.h --- simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/mxpyapi.h 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/mxpyapi.h 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,43 @@ +#ifndef MXPYAPI_H +#define MXPYAPI_H + +/* mxpyapi.h + + This header file includes some new APIs that are not available in + older API versions, yet are used by the mx-Extensions. + + Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com + Copyright (c) 2000-2003, eGenix.com Software GmbH; mailto:info@egenix.com + +*/ + +#if defined(PyUnicode_Check) && !defined(HAVE_UNICODE) +# define HAVE_UNICODE +#endif + +#if defined(HAVE_UNICODE) && !defined(Py_USING_UNICODE) +# undef HAVE_UNICODE +#endif + +#ifndef HAVE_UNICODE +# undef PyUnicode_Check +# define PyUnicode_Check(obj) 0 +#endif + +#if PY_MAJOR_VERSION >= 3 +#define PyInt_FromLong PyLong_FromLong +#define PyInt_Check PyLong_Check +#define PyInt_AS_LONG PyLong_AS_LONG + +#define PyString_FromStringAndSize PyBytes_FromStringAndSize +#define PyString_AsString PyBytes_AsString +#define PyString_FromString PyBytes_FromString +#define PyString_Check PyBytes_Check +#define PyString_FromFormat PyBytes_FromFormat +#define PyString_GET_SIZE PyBytes_GET_SIZE +#define PyString_AS_STRING PyBytes_AS_STRING +#define _PyString_Resize _PyBytes_Resize +#endif + +/* EOF */ +#endif diff -Nru simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/mxstdlib.h simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/mxstdlib.h --- simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/mxstdlib.h 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/mxstdlib.h 2012-10-17 20:54:31.000000000 +0000 @@ -0,0 +1,208 @@ +#ifndef MXSTDLIB_H +#define MXSTDLIB_H + +/* Standard stuff I use often -- not Python specific + + Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com + Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com + See the documentation for further copyright information or contact + the author. + + */ + +#include +#include +#include +#include +#include +#include +#ifdef HAVE_LIMITS_H +#include +#else +#ifndef INT_MAX +# define INT_MAX 2147483647 +#endif +#ifndef LONG_MAX +# define LONG_MAX INT_MAX +#endif +#endif + +/* --- My own macros for memory allocation... --------------------------- */ + +#ifdef MAL_MEM_DEBUG +# define newstruct(x) \ + (mxDebugPrintf("* malloc for struct "#x" (%s:%i)\n",__FILE__,__LINE__),\ + (x *)malloc(sizeof(x))) +# define cnewstruct(x) \ + (mxDebugPrintf("* calloc for struct "#x" (%s:%i)\n",c,__FILE__,__LINE__),\ + (x *)calloc(sizeof(x),1)) +# define new(x,c) \ + (mxDebugPrintf("* malloc for "#c"=%i '"#x"'s (%s:%i)\n",c,__FILE__,__LINE__),\ + (x *)malloc(sizeof(x)*(c))) +# define cnew(x,c) \ + (mxDebugPrintf("* calloc for "#c"=%i '"#x"'s (%s:%i)\n",c,__FILE__,__LINE__),\ + (x *)calloc((c),sizeof(x))) +# define resize(var,x,c) \ + (mxDebugPrintf("* realloc array "#var" ("#x") at %X to size "#c"=%i (%s:%i)\n",var,c,__FILE__,__LINE__),\ + (x *)realloc((void*)(var),sizeof(x)*(c))) +# define varresize(var,x,bytes) \ + (mxDebugPrintf("* realloc var "#var" ("#x") at %X to %i bytes (%s:%i)\n",var,bytes,__FILE__,__LINE__),\ + (x *)realloc((void*)(var),(bytes))) +# define free(x) \ + (mxDebugPrintf("* freeing "#x" at %X (%s:%i)\n",x,__FILE__,__LINE__),\ + free((void*)(x))) +#else +# define newstruct(x) ((x *)malloc(sizeof(x))) +# define cnewstruct(x) ((x *)calloc(sizeof(x),1)) +# define new(x,c) ((x *)malloc(sizeof(x)*(c))) +# define cnew(x,c) ((x *)calloc((c),sizeof(x))) +# define resize(var,x,c) ((x *)realloc((void*)(var),sizeof(x)*(c))) +# define varresize(var,x,bytes) ((x *)realloc((void*)(var),(bytes))) +# define free(x) free((void*)(x)) +#endif + +/* --- Debugging output ------------------------------------------------- */ + +/* Use the flag MAL_DEBUG to enable debug processing. + + The flag MAL_DEBUG_WITH_PYTHON can be used to indicate that the + object file will be linked with Python, so we can use Python APIs + for the debug processing here. + +*/ +#ifdef MAL_DEBUG_WITH_PYTHON +# ifndef PYTHON_API_VERSION +# error "mx.h must be included when compiling with MAL_DEBUG_WITH_PYTHON" +# endif +# ifndef MAL_DEBUG +# define MAL_DEBUG +# endif +#else +# if defined(PYTHON_API_VERSION) && defined(MAL_DEBUG) +# define MAL_DEBUG_WITH_PYTHON +# endif +#endif + +/* Indicator for the availability of these interfaces: */ + +#define HAVE_MAL_DEBUG + +/* Name of the environment variable defining the log file name + to be used: */ + +#ifndef MAL_DEBUG_OUTPUTFILE_ENV_VARIABLE +# define MAL_DEBUG_OUTPUTFILE_ENV_VARIABLE "mxLogFile" +#endif + +/* File name to be used for debug logging (each object file using this + facility may set its own logging file) if no environment variable + is set: */ + +#ifndef MAL_DEBUG_OUTPUTFILE +# define MAL_DEBUG_OUTPUTFILE "mx.log" +#endif + +/* Name of the environment variable defining the log file prefix to be + used (e.g. to direct all log files into a separate directory): */ + +#ifndef MAL_DEBUG_OUTPUTFILEPREFIX_ENV_VARIABLE +# define MAL_DEBUG_OUTPUTFILEPREFIX_ENV_VARIABLE "mxLogFileDir" +#endif + +/* File name prefix to be used for log files, if no environment + variable is set: */ + +#ifndef MAL_DEBUG_OUTPUTFILEPREFIX +# define MAL_DEBUG_OUTPUTFILEPREFIX "" +#endif + +/* Log id to be used */ + +#ifndef MAL_DEBUG_LOGID +# define MAL_DEBUG_LOGID "New Log Session" +#endif + +/* Debug printf() API + + Output is written to a log file or stream. If the output file is + not yet open, the function will try to open the file as defined by + the environment or the program defines. The file remains open + until the program terminates. Subsequent changes to the environment + are not taken into account. + + The output file is deduced in the following way: + + 1. get the filename from the environment, revert to the predefined + value + + 2. get the filename prefix from the environment, revert to + the predefined value + + 3. if filename is one of "stderr" or "stdout" use the native + streams for output; otherwise try to open fileprefix + filename + reverting to stderr in case this fails. + + */ + +static +int mxDebugPrintf(const char *format, ...) +{ + return 1; +} + +#ifdef MAL_DEBUG + +# ifdef MAL_DEBUG_WITH_PYTHON +/* Use the Python debug flag to enable debugging output (python -d) */ +# define DPRINTF if (Py_DebugFlag) mxDebugPrintf +# define IF_DEBUGGING if (Py_DebugFlag) +# define DEBUGGING (Py_DebugFlag > 0) +# else + +/* Always output debugging information */ +# define DPRINTF mxDebugPrintf +# define IF_DEBUGGING +# define DEBUGGING (1) +# endif + +#else + +# ifndef _MSC_VER +/* This assumes that you are using an optimizing compiler which + eliminates the resulting debug code. */ +# define DPRINTF if (0) mxDebugPrintf +# define IF_DEBUGGING if (0) +# define DEBUGGING (0) +# else + +/* MSVC doesn't do a good job here, so we use a different approach. */ +# define DPRINTF 0 && mxDebugPrintf +# define IF_DEBUGGING if (0) +# define DEBUGGING (0) +# endif + +#endif + +/* --- Misc ------------------------------------------------------------- */ + +/* The usual bunch... */ +#ifndef max +# define max(a,b) ((a>b)?(a):(b)) +#endif +#ifndef min +# define min(a,b) ((anumentries;\ + returnCode = NULL_CODE;\ + loopcount = -1;\ + loopstart = startPosition;\ + taglist_len = PyList_Size( taglist );\ +} + +/* Macro to reset tag-specific variables + +*/ +#define RESET_TAG_VARIABLES {\ + childStart = position;\ + childPosition = position;\ + childReturnCode = NULL_CODE;\ + childResults = NULL;\ +} +/* Macro to decode a tag-entry into local variables */ +#define DECODE_TAG {\ + mxTagTableEntry *entry;\ + entry = &table->entry[index];\ + command = entry->cmd;\ + flags = entry->flags;\ + match = entry->args;\ + failureJump = entry->jne;\ + successJump = entry->je;\ + tagobj = entry->tagobj;\ + if (tagobj == NULL) { tagobj = Py_None;}\ +} + +/* macro to push relevant local variables onto the stack and setup for child table + newTable becomes table, newResults becomes taglist + + This is currently only called in the Table/SubTable family of commands, + could be inlined there, but I find it cleaner to read here. +*/ +#define PUSH_STACK( newTable, newResults ) {\ + stackTemp = (recursive_stack_entry *) PyMem_Malloc( sizeof( recursive_stack_entry ));\ + stackTemp->parent = stackParent;\ + stackTemp->position = position;\ + stackTemp->startPosition = startPosition;\ + stackTemp->table = table;\ + stackTemp->index = index;\ + stackTemp->childStart = childStart;\ + stackTemp->resultsLength = taglist_len;\ + stackTemp->results = taglist;\ + \ + stackParent = stackTemp;\ + childReturnCode = PENDING_CODE;\ + \ + startPosition = position;\ + table = (mxTagTableObject *) newTable;\ + taglist = newResults;\ +} +#define POP_STACK {\ + if (stackParent) {\ + childStart = stackParent->childStart;\ + childPosition = position;\ + position = stackParent->position;\ + \ + startPosition = stackParent->startPosition;\ + \ + childResults = taglist;\ + taglist_len = stackParent->resultsLength;\ + taglist = stackParent->results;\ + if (table != stackParent->table ) { Py_DECREF( table ); }\ + table = stackParent->table;\ + table_len = table->numentries;\ + index = stackParent->index;\ + \ + stackTemp = stackParent->parent;\ + PyMem_Free( stackParent );\ + stackParent = stackTemp;\ + stackTemp = NULL;\ + \ + childReturnCode = returnCode;\ + returnCode = NULL_CODE;\ + }\ +} + + +#endif + +/* mxTextTools_TaggingEngine(): a table driven parser engine + + - return codes: returnCode = 2: match ok; returnCode = 1: match failed; returnCode = 0: error + - doesn't check type of passed arguments ! + - doesn't increment reference counts of passed objects ! +*/ + + + +int TE_ENGINE_API( + PyObject *textobj, + Py_ssize_t sliceleft, + Py_ssize_t sliceright, + mxTagTableObject *table, + PyObject *taglist, + PyObject *context, + Py_ssize_t *next +) { + TE_CHAR *text = NULL; /* Pointer to the text object's data */ + + /* local variables pushed into stack on recurse */ + /* whole-table variables */ + Py_ssize_t position = sliceleft; /* current (head) position in text for whole table */ + Py_ssize_t startPosition = sliceleft; /* start position for current tag */ + Py_ssize_t table_len = table->numentries; /* table length */ + short returnCode = NULL_CODE; /* return code: -1 not set, 0 error, 1 + not ok, 2 ok */ + Py_ssize_t index=0; /* index of current table entry */ + Py_ssize_t taglist_len = PyList_Size( taglist ); + + + /* variables tracking status of the current tag */ + register short childReturnCode = NULL_CODE; /* the current child's return code value */ + Py_ssize_t childStart = startPosition; + register Py_ssize_t childPosition = startPosition; + PyObject *childResults = NULL; /* store's the current child's results (for table children) */ + int flags=0; /* flags set in command */ + int command=0; /* command */ + int failureJump=0; /* rel. jump distance on 'not matched', what should the default be? */ + int successJump=1; /* dito on 'matched', what should the default be? */ + PyObject *match=NULL; /* matching parameter */ + int loopcount = -1; /* loop counter */ + Py_ssize_t loopstart = startPosition; /* loop start position */ + PyObject *tagobj = NULL; + + + /* parentTable is our nearest parent, i.e. the next item to pop + off the processing stack. We copied our local variables to it + before starting a child table, and will copy back from it when + we finish the child table. It's normally NULL + */ + recursive_stack_entry * stackParent = NULL; + recursive_stack_entry * stackTemp = NULL; /* just temporary storage for parent pointers */ + + /* Error-management variables */ + PyObject * errorType = NULL; + PyObject * errorMessage = NULL; + + /* Initialise the buffer + + Here is where we will add memory-mapped file support I think... + + expand the TE_STRING macros to check for mmap file objects + (only for str-type) and to access their values appropriately + f = open('c:\\temp\\test.mem', 'r') + buffer = mmap.mmap( f.fileno(), 0, access = mmap.ACCESS_READ ) + + */ + if (!TE_STRING_CHECK(textobj)) { + returnCode = ERROR_CODE; + errorType = PyExc_TypeError; + errorMessage = PyString_FromFormat( + "Expected a string or unicode object to parse: found %.50s", + Py_TYPE(textobj)->tp_name + ); + } else { + text = TE_STRING_AS_STRING(textobj); + if (text == NULL) { + returnCode = ERROR_CODE; + } + } + + while (1) { + /* this loop processes a whole table */ + while ( + (index < table_len) & + (returnCode == NULL_CODE) & + (index >= 0) + ) { + DPRINTF( "index %i\n", index ); + DECODE_TAG + if (childReturnCode == NULL_CODE ) { + /* if we are not continuing processing of the child + from a previous iteration we need to unpack the + child into local variables + */ + RESET_TAG_VARIABLES + childStart = position; + childPosition = position; + + } + if (command < MATCH_MAX_LOWLEVEL) { +#include "lowlevelcommands.h" + } else { + switch (command) { +/* Jumps & special commands */ +#include "speccommands.h" +/* non-table-recursion high-level stuff */ +#include "highcommands.h" +/* the recursive table commands */ +#include "recursecommands.h" + default: + { + childReturnCode = ERROR_CODE; + errorType = PyExc_ValueError; + errorMessage = PyString_FromFormat( + "Unrecognised command code %i", + command + ); + } + } + } + /* we're done a single tag, process partial results for the current child + + This is a major re-structuring point. Previously + all of this was scattered around (and duplicated among) + the various command and command-group clauses. + + There also used to be a function call to handle the + append/call functions. That's now handled inline + + */ + /* sanity check wanted by Marc-André for skip-before-buffer */ + if (childPosition < 0) { + childReturnCode = ERROR_CODE; + errorType = PyExc_TypeError; + errorMessage = PyString_FromFormat( + "tagobj (type %.50s) table entry %d moved/skipped beyond start of text (to position %d)", + Py_TYPE(tagobj)->tp_name, + (unsigned int)index, + (unsigned int)childPosition + ); + } + DPRINTF( "switch on return code %i\n", childReturnCode ); + switch(childReturnCode) { + case NULL_CODE: + case SUCCESS_CODE: + /* childReturnCode wasn't set or we positively matched + + positions are always: + childStart, childPosition + sub-results are: + childResults + unless childResults is taglist + in which case we use Py_None for the tag's children + unless childResults is NULL + in which case we create an empty list object + + we call: + tagobj == Py_None : + do nothing... + + [ result tuple needed ] + CallTag: + entry->tagobj( resultTuple ) + AppendToTagobj: + entry->tagobj.append( resultTuple ) + General Case: + taglist.append( resultTuple ) + + AppendMatch: + taglist.append( text[childStart:childPosition] ) + AppendTagobj: + taglist.append( entry->tagobj ) + + if LookAhead is specified: + childPosition is set to childStart before continuing + + finally we set position = childPosition + */ + { + PyObject * objectToCall = NULL; + PyObject * objectCallResult = NULL; + int releaseCallObject = 0; + int releaseChildResults = 0; + int releaseParameter = 1; + PyObject * parameter = NULL; + DPRINTF( "finishing success-code or null \n" ); + + if (tagobj == Py_None ) { + /* XXX note: this short-circuits around "AppendTagobj" flagged items which + specified tagobj == None... don't know if that's wanted or not. Similarly + doesn't report AppendMatch's. Not sure what's appropriate there either. + */ + DPRINTF( "tagobj was none\n" ); + DPRINTF( "Matched %i:%i but result not saved", childStart, childPosition ); + } else { + /* get the callable object */ + /* normally it's taglist.append, do the exceptions first */ + DPRINTF( "tagobj non-None, finding callable\n" ); + if (flags & MATCH_CALLTAG) { + /* want the tag itself */ + objectToCall = tagobj; + } else if (flags & MATCH_APPENDTAG) { + /* AppendToTagobj -> want the tag's append method */ + DPRINTF( "append to tag obj\n" ); + objectToCall = PyObject_GetAttrString( tagobj, "append" ); + DPRINTF( "got object\n"); + if (objectToCall == NULL) { + DPRINTF( "got invalid object\n"); + returnCode = ERROR_CODE; + errorType = PyExc_AttributeError; + errorMessage = PyString_FromFormat( + "tagobj (type %.50s) for table entry %d (flags include AppendTag) doesn't have an append method", + Py_TYPE(tagobj)->tp_name, + (unsigned int)index + ); + } else { + DPRINTF( "got valid object\n"); + releaseCallObject = 1; + } + } else { + DPRINTF( "appending to tag-list\n" ); + /* append of the taglist, which we know exists, because it's a list + We optimise this to use the raw List API + */ + objectToCall = NULL; /*PyObject_GetAttrString( taglist, "append" );*/ + } + if (returnCode == NULL_CODE && objectToCall && PyCallable_Check(objectToCall)==0) { + /* object to call isn't callable */ + DPRINTF( "object not callable\n" ); + returnCode = ERROR_CODE; + errorType = PyExc_TypeError; + errorMessage = PyString_FromFormat( + "The object to call type(%.50s) for table entry %d isn't callable", + Py_TYPE(objectToCall)->tp_name, + (unsigned int)index + ); + } + if (returnCode == NULL_CODE) { + /* get the parameter with which to call */ + /* normally it's a result tuple, do exceptions first */ + DPRINTF( "getting parameter\n" ); + if (flags & MATCH_APPENDMATCH) { + /* XXX need to do bounds-checking here + so that: + childStart >= sliceleft + childPosition >= sliceleft + childPosition <= sliceright + */ + /* MATCH_APPENDMATCH cannot occur with any + other flag (makes no sense) so objectToCall + _must_ be the taglist, and we just want to append + the string, not a tuple wrapping the string. That is, + everywhere else we use tuples, here we don't + */ + parameter = TE_STRING_FROM_STRING( + TE_STRING_AS_STRING(textobj) + childStart, + childPosition - childStart + ); + if (parameter == NULL) { + /* error occured getting parameter, report the exception */ + returnCode = ERROR_CODE; + } + } else if ( flags & MATCH_APPENDTAGOBJ) { + /* append the tagobj itself to the results list */ + if (tagobj == NULL) { + parameter = Py_None; + } else { + parameter = tagobj; + } + releaseParameter = 0; + } else { + /* need to know what the child-list is to build resultsTuple + if childResults is non-null and not taglist use it + if childResults == taglist, use Py_None + otherwise use Py_None ( originally we created a new empty list object, that was wrong :) ). + */ + if (childResults == taglist) { + childResults = Py_None ; + } else if (childResults != NULL) { + /* exists already, with a reference from PUSH's creation */ + releaseChildResults = 1; + } else { + /* turns out mxTextTools declares the return value to be + None or [], using None is far more efficient, so I've made + the code use it here */ + childResults = Py_None; + releaseChildResults = 0; /* we aren't increfing it locally */ + } + if (childResults == NULL || tagobj == NULL) { + returnCode = ERROR_CODE; + } else { + if (flags & MATCH_CALLTAG) { + parameter = Py_BuildValue( "OOiiO", taglist, textobj, childStart, childPosition, childResults ); + } else if (flags & MATCH_APPENDTAG) { + /* AppendToTagobj -> want to call append with a 4-tuple of values, so parameter needs to be ((x,y,z,w),) */ + /* XXX can't get the darn thing to accept "((OiiO))" :( */ + parameter = Py_BuildValue( + "((OiiO))", + Py_None, + childStart, + childPosition, + childResults + ); + } else { + /* either we are calling a method that requires the 4 args, or we're appending the 4-tuple to a list */ + parameter = Py_BuildValue( "OiiO", tagobj, childStart, childPosition, childResults ); + } + if (parameter == NULL) { + returnCode = ERROR_CODE; + } + } + } + DPRINTF( "done getting parameter\n" ); + if (parameter == NULL && returnCode == ERROR_CODE && errorType == NULL) { + errorType = PyExc_SystemError; + /* following may fail, as we may have run out of memory */ + errorMessage = PyString_FromFormat( + "Unable to build return-value tuple" + ); + } + /* now have both object and parameter and object is callable */ + if (returnCode == NULL_CODE) { + /* no errors yet */ + DPRINTF( "doing call\n" ); + if (objectToCall) { + DPRINTF( " object call\n" ); + /* explicit object to call */ + Py_INCREF( objectToCall ); + Py_INCREF( parameter ); + DPRINTF( " lock released\n" ); + objectCallResult = PyEval_CallObject( objectToCall, parameter ); + DPRINTF( " call finished\n" ); + Py_DECREF( objectToCall ); + Py_DECREF( parameter ); + DPRINTF( " lock acquired\n" ); + if (objectCallResult == NULL) { + DPRINTF( " null result\n" ); + returnCode = ERROR_CODE; + /* exception is already there, should alter error-handler to check for it */ + } else { + DPRINTF( " non-null result, decrefing\n" ); + Py_DECREF( objectCallResult ); + DPRINTF( " decrefd\n" ); + } + objectCallResult = NULL; + } else { + /* list steals reference */ + DPRINTF( " list append\n" ); + if (PyList_Append( taglist, parameter ) == -1) { + returnCode = ERROR_CODE; + /* list didn't steal ref yet */ + errorType = PyExc_SystemError; + /* following is likely to fail, as we've likely run out of memory */ + errorMessage = PyString_FromFormat( + "Unable to append result tuple to result list!" + ); + } + } + } + } + DPRINTF( "checking whether to release object\n" ); + if (releaseCallObject) { + Py_DECREF( objectToCall ); + } + objectToCall = NULL; + releaseCallObject = 0; + + if (releaseChildResults) { + Py_DECREF( childResults ); + } + childResults = NULL; + releaseChildResults = 0; + if (releaseParameter && parameter ) { + Py_DECREF( parameter ); + } + parameter = NULL; + releaseParameter = 1; + } /* ends the else clause for reporting a result */ + /* reset for lookahead */ + if (flags & MATCH_LOOKAHEAD) { + position = childStart; + } else { + position = childPosition; + } + index += successJump; + DPRINTF( "finished success-handler code\n" ); + break; + } + case FAILURE_CODE: + /* failed, if failure jump is default, should set table returnCode */ + if (childResults) { + if (childResults != taglist) { + /* different list, decref it since we won't be using it any more */ + Py_DECREF( childResults ); + } + childResults = NULL; + } + /* XXX possible (eventual) logic error here? + + fail with jump of 0 might work in certain cases where the + "parsing" is actually occuring outside of the current buffer + (i.e. a side-effect-based parsing node that fails X times before + finally succeeding). + + Don't see anything in current commands that can cause a problem + but we may need to make this an explicitly watched idea, rather + than a consequence of the child failing with a 0 failureJump value. + */ + position = childStart; + if (failureJump == 0) { + returnCode = 1; + } else { + index += failureJump; + } + break; + case PENDING_CODE: + /* the child tag hasn't begun parsing, this was a + recursive-tag-start loop pass. PENDING_CODE is set + by the stack push operation + */ + break; + case ERROR_CODE: + { + /* explicit error encountered while processing this child + + Handle this as gracefully as possible, potentially triggering + huge sets of operations, but therefore needing to be very careful + about system-level errors (such as memory errors). + + 1) Signal whole table as err-d + 2) Record any extra values for the error message? + */ + returnCode = ERROR_CODE; + break; + } + default: + { + /* what error should be raised when an un-recognised return code is generated? */ + returnCode = ERROR_CODE; + errorType = PyExc_SystemError; + errorMessage = PyString_FromFormat( + "An unknown child return code %i was generated by tag-table item %d", + childReturnCode, + (unsigned int)index + ); + } + } + childReturnCode = NULL_CODE; + /* single entry processing loop complete */ + } + /* we're done the table, figure out what to do. */ + if (returnCode == NULL_CODE) { + /* no explicit return code was set, but done table: + + index went beyond table_len (>=table_len) -> success + index moved before table start (<= 0) -> failure + */ + if (index >= table_len) { + /* success */ + returnCode = SUCCESS_CODE; + } else if (position >= sliceright) { + /* EOF while parsing, special type of failure + + Eventually allow for returning the whole parse-stack + for restarting the parser from a particular point. + */ + /*returnCode = EOF_CODE;*/ + returnCode = FAILURE_CODE; + } else if (index < 0) { + /* explicit jump before table */ + returnCode = FAILURE_CODE; + } else { + returnCode = FAILURE_CODE; + } + } + if (returnCode == FAILURE_CODE) { + /* truncate result list */ + if (PyList_SetSlice( + taglist, + taglist_len, + PyList_Size(taglist), + NULL) + ) { + returnCode = ERROR_CODE; + errorMessage = PyString_FromFormat( + "Unable to truncate list object (likely tagging engine error) type(%.50s)", + Py_TYPE(taglist)->tp_name + ); + } + /* reset position */ + position = startPosition; + } + if (returnCode == ERROR_CODE) { + /* + DO_FANCY_ERROR_REPORTING( ); + + This is where we will do the user-triggered error reporting + (as well as reporting low-level errors such as memory/type/value). + + We have 3 values possibly available: + errorType -> PyObject * to current error class (or NULL) + if it is a MemoryError: + + Jettison some ballast then attempt to return a short + message. Need to create this ballast somewhere for that + to work. + + if is any other error class: + + create the error object and raise it + + decorate it with details: + + current table (need to incref to keep alive) + current index + current position + childStart + childPosition + + if it is simpleparse.stt.TextTools.ParsingError: + (triggered by the user in their grammar) + + create a list of non-None parent tagobjs (a stack + report) and add it to the object + + + + + + 3) Build an actual error object if possible? + 4) Report the parent hierarchy of the failure point + 5) + */ + char * msg = NULL; + if (errorMessage && errorType) { + /* we only report our own error if we've got all the information for it + + XXX Need to check that we don't have cases that are just setting type + */ + msg = PyString_AsString( errorMessage); + PyErr_SetString( errorType, msg ); + Py_DECREF( errorMessage ); + } + + + + /* need to free the whole stack at once */ + while (stackParent != NULL) { + /* this is inefficient, should do it all-in-one-go without copying values back + save for startPosition and returnCode in the last item*/ + POP_STACK + /* need to clean up all INCREF'd objects as we go... */ + if (childResults != taglist) { + /* different list, decref it since we won't be using it any more */ + Py_DECREF( childResults ); + } + childResults = NULL; + } + *next = startPosition; + return 0; + } else { + if (stackParent != NULL) { + /* pop stack also sets the childReturnCode for us... */ + POP_STACK + } else { + /* this was the root table, + return the final results */ + if (returnCode == FAILURE_CODE) { + /* there is a clause in the docs for tag that says + this will return the "error position" for the table. + That requires reporting childPosition for the the + last-matched position */ + *next = childPosition; + } else { + *next = position; + } + return returnCode; + } + } + } /* end of infinite loop */ +} + diff -Nru simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/mxTextTools.c simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/mxTextTools.c --- simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/mxTextTools.c 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/mxTextTools.c 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,5179 @@ +/* + mxTextTools -- Fast text manipulation routines + + Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com + Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com +*/ + +/* We want all our symbols to be exported */ +#ifndef MX_BUILDING_MXTEXTTOOLS +#define MX_BUILDING_MXTEXTTOOLS +#endif + +/* Logging file used by debugging facility */ +#ifndef MAL_DEBUG_OUTPUTFILE +# define MAL_DEBUG_OUTPUTFILE "mxTextTools.log" +#endif + +#include "mx.h" +#include "mxTextTools.h" +#include "structmember.h" +#include + +#define VERSION "2.1.0" + +/* Initial list size used by e.g. setsplit(), setsplitx(),... */ +#define INITIAL_LIST_SIZE 64 + +/* Maximum TagTable cache size. If this limit is reached, the cache + is cleared to make room for new compile TagTables. */ +#define MAX_TAGTABLES_CACHE_SIZE 100 + +/* Define this to enable the copy-protocol (__copy__, __deepcopy__) */ +#define COPY_PROTOCOL + +/* Convenience macro for reducing clutter */ +#define ADD_INT_CONSTANT(name, value) \ + if (PyModule_AddIntConstant(module, name, value) < 0) \ + return NULL; + +/* --- module doc-string -------------------------------------------------- */ + +PyDoc_STRVAR(Module_docstring, + + MXTEXTTOOLS_MODULE" -- Tools for fast text processing. Version "VERSION"\n\n" + + "Copyright (c) 1997-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com\n" + "Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com\n\n" + "Copyright (c) 2003-2006, Mike Fletcher; mailto:mcfletch@vrplumber.com\n\n" + + " All Rights Reserved\n\n" + "See the documentation for further information on copyrights,\n" + "or contact the author.") +; + +/* --- internal macros ---------------------------------------------------- */ + +/* --- module globals ----------------------------------------------------- */ + +/* Translation strings for the 8-bit versions of lower() and upper() */ +static PyObject *mx_ToUpper; +static PyObject *mx_ToLower; + +static PyObject *mxTextTools_Error; /* mxTextTools specific error */ + +static PyObject *mxTextTools_TagTables; /* TagTable cache dictionary */ + +/* Flag telling us whether the module was initialized or not. */ +static int mxTextTools_Initialized = 0; + +/* --- forward declarations ----------------------------------------------- */ + +/* --- module helper ------------------------------------------------------ */ + +static +PyObject *mxTextTools_ToUpper(void) +{ + char tr[256]; + Py_ssize_t i; + + for (i = 0; i < 256; i++) + tr[i] = toupper((char)i); + return PyString_FromStringAndSize(tr,sizeof(tr)); +} + +static +PyObject *mxTextTools_ToLower(void) +{ + char tr[256]; + Py_ssize_t i; + + for (i = 0; i < 256; i++) + tr[i] = tolower((char)i); + return PyString_FromStringAndSize(tr,sizeof(tr)); +} + +/* Create an exception object, insert it into the module dictionary + under the given name and return the object pointer; this is NULL in + case an error occurred. base can be given to indicate the base + object to be used by the exception object. It should be NULL + otherwise */ + +/* --- module interface --------------------------------------------------- */ + +/* --- Text Search Object ----------------------------------------------*/ + +/* allocation */ + +static +PyObject *mxTextSearch_New(PyObject *match, + PyObject *translate, + int algorithm) +{ + mxTextSearchObject *so; + + so = PyObject_NEW(mxTextSearchObject, &mxTextSearch_Type); + if (so == NULL) + return NULL; + so->data = NULL; + so->translate = NULL; + so->match = NULL; + + Py_INCREF(match); + so->match = match; + + if (translate == Py_None) + translate = NULL; + else if (translate) { + Py_Assert(PyString_Check(translate), + PyExc_TypeError, + "translate table must be a string"); + Py_Assert(PyString_GET_SIZE(translate) == 256, + PyExc_TypeError, + "translate string must have exactly 256 chars"); + Py_INCREF(translate); + } + so->translate = translate; + + /* Init algorithm */ + so->algorithm = algorithm; + switch (algorithm) { + + case MXTEXTSEARCH_BOYERMOORE: + Py_Assert(PyString_Check(match), + PyExc_TypeError, + "match must be a string for Boyer-Moore"); + so->data = bm_init(PyString_AS_STRING(match), + PyString_GET_SIZE(match)); + Py_Assert(so->data != NULL, + PyExc_TypeError, + "error initializing the search object"); + break; + + case MXTEXTSEARCH_TRIVIAL: + Py_Assert(PyString_Check(match) || PyUnicode_Check(match), + PyExc_TypeError, + "match must be a string or unicode"); + Py_Assert(so->translate == NULL, + PyExc_TypeError, + "trivial search algorithm does not support translate"); + break; + + default: + Py_Error(PyExc_ValueError, + "unknown or unsupported algorithm"); + + } + return (PyObject *)so; + + onError: + Py_DECREF(so); + return NULL; +} + +Py_C_Function_WithKeywords( + mxTextSearch_TextSearch, + "TextSearch(match[,translate=None,algorithm=default_algorithm])\n\n" + "Create a substring search object for the string match;\n" + "translate is an optional translate-string like the one used\n" + "in the module re." + ) +{ + PyObject *match = 0; + PyObject *translate = 0; + int algorithm = -424242; + + Py_KeywordsGet3Args("O|Oi:TextSearch",match,translate,algorithm); + + if (algorithm == -424242) { + if (PyUnicode_Check(match)) + algorithm = MXTEXTSEARCH_TRIVIAL; + else + algorithm = MXTEXTSEARCH_BOYERMOORE; + } + return mxTextSearch_New(match, translate, algorithm); + + onError: + return NULL; +} + +static +void mxTextSearch_Free(mxTextSearchObject *so) +{ + if (so->data) { + switch (so->algorithm) { + + case MXTEXTSEARCH_BOYERMOORE: + bm_free(so->data); + break; + + case MXTEXTSEARCH_TRIVIAL: + break; + + } + } + Py_XDECREF(so->match); + Py_XDECREF(so->translate); + PyObject_Del(so); +} + +/* C APIs */ + +#define so ((mxTextSearchObject *)self) + +/* Get the match length from an TextSearch object or -1 in case of an + error. */ + +Py_ssize_t mxTextSearch_MatchLength(PyObject *self) +{ + Py_Assert(mxTextSearch_Check(self), + PyExc_TypeError, + "expected a TextSearch object"); + + switch (so->algorithm) { + + case MXTEXTSEARCH_BOYERMOORE: + return BM_MATCH_LEN(so->data); + break; + + case MXTEXTSEARCH_TRIVIAL: + if (PyString_Check(so->match)) + return PyString_GET_SIZE(so->match); +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(so->match)) + return PyUnicode_GET_SIZE(so->match); +#endif + break; + + } + + Py_Error(mxTextTools_Error, + "internal error"); + + onError: + return -1; +} + +static +Py_ssize_t trivial_search(const char *text, + Py_ssize_t start, + Py_ssize_t stop, + const char *match, + Py_ssize_t match_len) +{ + Py_ssize_t ml1 = match_len - 1; + register const char *tx = &text[start]; + register Py_ssize_t x = start; + + if (ml1 < 0) + return start; + + /* Brute-force method; from right to left */ + for (;;) { + register Py_ssize_t j = ml1; + register const char *mj = &match[j]; + + if (x + j >= stop) + /* reached eof: no match */ + return start; + + /* scan from right to left */ + for (tx += j; j >= 0 && *tx == *mj; + tx--, mj--, j--) ; + + if (j < 0) { + /* found */ + x += ml1 + 1; + return x; + } + /* not found: rewind and advance one char */ + tx -= j - 1; + x++; + } + return start; +} + +#ifdef HAVE_UNICODE +static +Py_ssize_t trivial_unicode_search(const Py_UNICODE *text, + Py_ssize_t start, + Py_ssize_t stop, + const Py_UNICODE *match, + Py_ssize_t match_len) +{ + Py_ssize_t ml1 = match_len - 1; + register const Py_UNICODE *tx = &text[start]; + register Py_ssize_t x = start; + + if (ml1 < 0) + return start; + + /* Brute-force method; from right to left */ + for (;;) { + register Py_ssize_t j = ml1; + register const Py_UNICODE *mj = &match[j]; + + if (x + j >= stop) + /* reached eof: no match */ + return start; + + /* scan from right to left */ + for (tx += j; j >= 0 && *tx == *mj; + tx--, mj--, j--) ; + + if (j < 0) { + /* found */ + x += ml1 + 1; + return x; + } + /* not found: rewind and advance one char */ + tx -= j - 1; + x++; + } + return start; +} +#endif + +/* Search for the match in text[start:stop]. + + Returns 1 in case a match was found and sets sliceleft, sliceright + to the matching slice. + + Returns 0 in case no match was found and -1 in case of an error. + +*/ + +Py_ssize_t mxTextSearch_SearchBuffer(PyObject *self, + char *text, + Py_ssize_t start, + Py_ssize_t stop, + Py_ssize_t *sliceleft, + Py_ssize_t *sliceright) +{ + Py_ssize_t nextpos; + Py_ssize_t match_len; + + Py_Assert(mxTextSearch_Check(self), + PyExc_TypeError, + "expected a TextSearch object"); + + switch (so->algorithm) { + + case MXTEXTSEARCH_BOYERMOORE: + if (so->translate) { + /* search with translate table */ + nextpos = bm_tr_search((mxbmse_data *)so->data, + text, + start, + stop, + PyString_AS_STRING(so->translate)); + } + else { + /* exact search */ + nextpos = bm_search((mxbmse_data *)so->data, + text, + start, + stop); + } + match_len = BM_MATCH_LEN(so->data); + break; + + case MXTEXTSEARCH_TRIVIAL: + { + const char *match; + + if (PyString_Check(so->match)) { + match = PyString_AS_STRING(so->match); + match_len = PyString_GET_SIZE(so->match); + } + else if (PyObject_AsCharBuffer(so->match, &match, &match_len)) + goto onError; + nextpos = trivial_search(text, + start, + stop, + match, + match_len); + } + break; + + default: + Py_Error(mxTextTools_Error, + "unknown algorithm type in mxTextSearch_SearchBuffer"); + + } + /* Found ? */ + if (nextpos != start) { + if (sliceleft) + *sliceleft = nextpos - match_len; + if (sliceright) + *sliceright = nextpos; + return 1; + } + /* Not found */ + return 0; + + onError: + return -1; +} + +#ifdef HAVE_UNICODE +Py_ssize_t mxTextSearch_SearchUnicode(PyObject *self, + Py_UNICODE *text, + Py_ssize_t start, + Py_ssize_t stop, + Py_ssize_t *sliceleft, + Py_ssize_t *sliceright) +{ + Py_ssize_t nextpos; + Py_ssize_t match_len; + + Py_Assert(mxTextSearch_Check(self), + PyExc_TypeError, + "expected a TextSearch object"); + + switch (so->algorithm) { + + case MXTEXTSEARCH_BOYERMOORE: + Py_Error(PyExc_TypeError, + "Boyer-Moore search algorithm does not support Unicode"); + break; + + case MXTEXTSEARCH_TRIVIAL: + { + PyObject *u; + Py_UNICODE *match; + + if (PyUnicode_Check(so->match)) { + u = NULL; + match = PyUnicode_AS_UNICODE(so->match); + match_len = PyUnicode_GET_SIZE(so->match); + } + else { + u = PyUnicode_FromEncodedObject(so->match, NULL, NULL); + if (u == NULL) + goto onError; + match = PyUnicode_AS_UNICODE(u); + match_len = PyUnicode_GET_SIZE(u); + } + nextpos = trivial_unicode_search(text, + start, + stop, + match, + match_len); + Py_XDECREF(u); + } + break; + + default: + Py_Error(mxTextTools_Error, + "unknown algorithm type in mxTextSearch_SearchUnicode"); + + } + /* Found ? */ + if (nextpos != start) { + if (sliceleft) + *sliceleft = nextpos - match_len; + if (sliceright) + *sliceright = nextpos; + return 1; + } + /* Not found */ + return 0; + + onError: + return -1; +} +#endif + +/* methods */ + +Py_C_Function( mxTextSearch_search, + "TextSearch.search(text,start=0,stop=len(text))\n\n" + "Search for the substring in text, looking only at the\n" + "slice [start:stop] and return the slice (l,r)\n" + "where the substring was found, (start,start) otherwise.") +{ + PyObject *text; + Py_ssize_t start = 0; + Py_ssize_t stop = INT_MAX; + Py_ssize_t sliceleft, sliceright; + int rc; + + Py_Get3Args("O|ii:TextSearch.search", + text,start,stop); + + if (PyString_Check(text)) { + Py_CheckStringSlice(text, start, stop); + rc = mxTextSearch_SearchBuffer(self, + PyString_AS_STRING(text), + start, + stop, + &sliceleft, + &sliceright); + } +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) { + Py_CheckUnicodeSlice(text, start, stop); + rc = mxTextSearch_SearchUnicode(self, + PyUnicode_AS_UNICODE(text), + start, + stop, + &sliceleft, + &sliceright); + } +#endif + else + Py_Error(PyExc_TypeError, + "expected string or unicode"); + if (rc < 0) + goto onError; + if (rc == 0) { + sliceleft = start; + sliceright = start; + } + + /* Return the slice */ + Py_Return2("ii", sliceleft, sliceright); + + onError: + return NULL; +} + +Py_C_Function( mxTextSearch_find, + "TextSearch.find(text,start=0,stop=len(text))\n\n" + "Search for the substring in text, looking only at the\n" + "slice [start:stop] and return the index\n" + "where the substring was found, -1 otherwise.") +{ + PyObject *text; + Py_ssize_t start = 0; + Py_ssize_t stop = INT_MAX; + Py_ssize_t sliceleft, sliceright; + int rc; + + Py_Get3Args("O|ii:TextSearch.find", + text,start,stop); + + if (PyString_Check(text)) { + Py_CheckStringSlice(text, start, stop); + rc = mxTextSearch_SearchBuffer(self, + PyString_AS_STRING(text), + start, + stop, + &sliceleft, + &sliceright); + } +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) { + Py_CheckUnicodeSlice(text, start, stop); + rc = mxTextSearch_SearchUnicode(self, + PyUnicode_AS_UNICODE(text), + start, + stop, + &sliceleft, + &sliceright); + } +#endif + else + Py_Error(PyExc_TypeError, + "expected string or unicode"); + if (rc < 0) + goto onError; + if (rc == 0) + sliceleft = -1; + return PyInt_FromLong(sliceleft); + + onError: + return NULL; +} + +Py_C_Function( mxTextSearch_findall, + "TextSearch.findall(text,start=0,stop=len(text))\n\n" + "Search for the substring in text, looking only at the\n" + "slice [start:stop] and return a list of all\n" + "non overlapping slices (l,r) in text where the match\n" + "string can be found.") +{ + PyObject *text; + PyObject *list = 0; + Py_ssize_t start = 0; + Py_ssize_t stop = INT_MAX; + Py_ssize_t stop_index; + Py_ssize_t match_len; + Py_ssize_t listsize = INITIAL_LIST_SIZE; + Py_ssize_t listitem = 0; + + Py_Get3Args("O|ii:TextSearch.findall", + text,start,stop); + + if (PyString_Check(text)) { + Py_CheckStringSlice(text, start, stop); + } +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) { + Py_CheckUnicodeSlice(text, start, stop); + } +#endif + else + Py_Error(PyExc_TypeError, + "expected string or unicode"); + + list = PyList_New(listsize); + if (!list) + goto onError; + + match_len = mxTextSearch_MatchLength(self); + if (match_len < 0) + goto onError; + stop_index = stop - match_len; + + while (start <= stop_index) { + register PyObject *t,*v; + int rc; + Py_ssize_t sliceleft, sliceright; + + /* exact search */ + if (PyString_Check(text)) + rc = mxTextSearch_SearchBuffer(self, + PyString_AS_STRING(text), + start, + stop, + &sliceleft, + &sliceright); +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) + rc = mxTextSearch_SearchUnicode(self, + PyUnicode_AS_UNICODE(text), + start, + stop, + &sliceleft, + &sliceright); +#endif + else + break; + if (rc < 0) + goto onError; + if (rc == 0) + break; + + /* Build slice and append to list */ + t = PyTuple_New(2); + if (!t) + goto onError; + v = PyInt_FromLong(sliceleft); + if (!v) + goto onError; + PyTuple_SET_ITEM(t,0,v); + v = PyInt_FromLong(sliceright); + if (!v) + goto onError; + PyTuple_SET_ITEM(t,1,v); + + if (listitem < listsize) + PyList_SET_ITEM(list, listitem, t); + else { + PyList_Append(list, t); + Py_DECREF(t); + } + listitem++; + + start = sliceright; + } + + /* Resize list if necessary */ + if (listitem < listsize) + PyList_SetSlice(list, listitem, listsize, (PyObject*)NULL); + + return list; + + onError: + Py_XDECREF(list); + return NULL; +} + +#ifdef COPY_PROTOCOL +Py_C_Function( mxTextSearch_copy, + "copy([memo])\n\n" + "Return a new reference for the instance. This function\n" + "is used for the copy-protocol. Real copying doesn't take\n" + "place, since the instances are immutable.") +{ + PyObject *memo; + + Py_GetArg("|O",memo); + Py_INCREF(so); + return (PyObject *)so; + onError: + return NULL; +} +#endif + +#undef so + +/* --- slots --- */ + +static +PyObject *mxTextSearch_Repr(mxTextSearchObject *self) +{ + char *algoname; + PyObject *v; + char t[500], *reprstr; + + v = PyObject_Repr(self->match); + if (v == NULL) + return NULL; + reprstr = PyString_AsString(v); + if (reprstr == NULL) + return NULL; + + switch (self->algorithm) { + case MXTEXTSEARCH_BOYERMOORE: + algoname = "Boyer-Moore"; + break; + case MXTEXTSEARCH_TRIVIAL: + algoname = "Trivial"; + break; + default: + algoname = ""; + } + + sprintf(t, "<%.50s TextSearch object for %.400s at 0x%lx>", + algoname, reprstr, (long)self); + Py_DECREF(v); + return PyString_FromString(t); +} + +/* Python Method Table */ + +static +PyMethodDef mxTextSearch_Methods[] = +{ + Py_MethodListEntry("search",mxTextSearch_search), + Py_MethodListEntry("find",mxTextSearch_find), + Py_MethodListEntry("findall",mxTextSearch_findall), +#ifdef COPY_PROTOCOL + Py_MethodListEntry("__deepcopy__",mxTextSearch_copy), + Py_MethodListEntry("__copy__",mxTextSearch_copy), +#endif + {NULL,NULL} /* end of list */ +}; + +static PyMemberDef mxTextSearch_members[] = { + {"match",T_OBJECT_EX,offsetof(mxTextSearchObject,match),READONLY,"Text that this search matches"}, + {"translate",T_OBJECT,offsetof(mxTextSearchObject,translate),READONLY,"Translated search term"}, + {"algorithm",T_INT,offsetof(mxTextSearchObject,algorithm),READONLY,"Algorithm in use by the text search"}, + {NULL} +}; + +/* Python Type Table */ + +PyTypeObject mxTextSearch_Type = { + PyVarObject_HEAD_INIT(NULL, 0) /* init at startup ! */ + "TextSearch", /*tp_name*/ + sizeof(mxTextSearchObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + /* methods */ + (destructor)mxTextSearch_Free, /*tp_dealloc*/ + (printfunc)0, /*tp_print*/ + (getattrfunc)0, /*tp_getattr*/ + (setattrfunc)0, /*tp_setattr*/ + 0, /*tp_compare*/ + (reprfunc)mxTextSearch_Repr, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_number*/ + 0, /*tp_as_mapping*/ + (hashfunc)0, /*tp_hash*/ + (ternaryfunc)0, /*tp_call*/ + (reprfunc)0, /*tp_str*/ + (getattrofunc)0, /*tp_getattro*/ + (setattrofunc)0, /*tp_setattro*/ + 0, /*tp_asbuffer*/ + Py_TPFLAGS_DEFAULT, /*tp_flags*/ + "mxTextTools text-search object", /*tp_doc*/ + 0, /*tp_traverse*/ + 0, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + mxTextSearch_Methods, /*tp_methods*/ + mxTextSearch_members, /*tp_members*/ +}; + +/* --- Character Set Object --------------------------------------------*/ + +/* internal */ + +/* 8-bit character sets are implemented using a simple 32-byte + long bitmap with one bit per character. + + Addressing is done as follows: + + def char_is_set(ordinal): + return bitmap[ordinal >> 3] & (1 << (ordinal & 7)) + +*/ + +#define STRING_CHARSET_SIZE 256 +#define STRING_CHARSET_BITMAP_SIZE (STRING_CHARSET_SIZE / 8) + +typedef struct { + unsigned char bitmap[STRING_CHARSET_BITMAP_SIZE]; + /* character bitmap */ +} string_charset; + +static +int init_string_charset(mxCharSetObject *cs, + PyObject *definition) +{ + register Py_ssize_t i, j; + char *def = PyString_AS_STRING(definition); + const Py_ssize_t len = PyString_GET_SIZE(definition); + string_charset *lookup = 0; + register unsigned char *bitmap; + int logic = 1; + + /* Handle logic change (first char is '^' for negative matching) */ + if (len > 0 && def[0] == '^') { + logic = 0; + i = 1; + } + else + i = 0; + + /* Build 32-byte lookup bitmap (one bit per character) */ + lookup = (string_charset *)PyMem_Malloc(sizeof(string_charset)); + if (lookup == NULL) { + PyErr_NoMemory(); + goto onError; + } + memset(lookup, 0, sizeof(string_charset)); + cs->mode = MXCHARSET_8BITMODE; + cs->lookup = (void *)lookup; + bitmap = lookup->bitmap; + + for (; i < len; i++) { + + /* Handle escapes: "b\-d", "\\" */ + if (def[i] == '\\') { + if (i < len - 1 && def[i+1] == '\\') { + j = (unsigned char)'\\'; + bitmap[j >> 3] |= 1 << (j & 7); + i++; + } + continue; + } + + /* Handle ranges: "b-d", "\\-z", "\--z" */ + if (i < len - 2 && def[i+1] == '-') { + unsigned char range_left = def[i]; + unsigned char range_right = def[i+2]; + for (j = range_left; j <= range_right; j++) + bitmap[j >> 3] |= 1 << (j & 7); + i++; + continue; + } + + /* Normal processing */ + j = (unsigned char)def[i]; + bitmap[j >> 3] |= 1 << (j & 7); + } + + /* Invert bitmap if negative matching is requested */ + if (!logic) { + DPRINTF("init_string_charset: inverting bitmap\n"); + for (i = 0; i < STRING_CHARSET_BITMAP_SIZE; i++) + bitmap[i] ^= 0xFF; + } + + return 0; + + onError: + if (lookup) + PyMem_Free((void *)lookup); + cs->lookup = 0; + return -1; +} + +#ifdef HAVE_UNICODE + +/* Unicode character sets are implemented using two step indexing + which is a good compromise between lookup speed and memory usage. + + Lookup is done using a variable length array of 32-byte bitmap + blocks. There can be 256 such blocks. Identical blocks are + collapsed into a single copy. + + Addressing is done as follows: + + def char_is_set(ordinal): + index = bitmapindex[ordinal >> 8] + bitmap = bitmaps[index] + return bitmap[(ordinal >> 3) & 31] & (1 << (ordinal & 7)) + + The technique used here is very similar to what is done in Python's + SRE (see the BIGCHARSET patch by Martin von Loewis). Compression + should be reasonably good since character sets in practice usually + only contains a few single characters or longer ranges of Unicode + characters. + +*/ + +#define UNICODE_CHARSET_SIZE 65536 +#define UNICODE_CHARSET_BITMAP_SIZE 32 +#define UNICODE_CHARSET_BITMAPS (UNICODE_CHARSET_SIZE / (UNICODE_CHARSET_BITMAP_SIZE * 8)) +#define UNICODE_CHARSET_BIGMAP_SIZE (UNICODE_CHARSET_SIZE / 8) + +typedef struct { + unsigned char bitmapindex[UNICODE_CHARSET_BITMAPS]; + /* Index to char bitmaps */ + unsigned char bitmaps[UNICODE_CHARSET_BITMAPS][UNICODE_CHARSET_BITMAP_SIZE]; + /* Variable length bitmap array */ +} unicode_charset; + +static +int init_unicode_charset(mxCharSetObject *cs, + PyObject *definition) +{ + register Py_ssize_t i, j; + Py_UNICODE *def = PyUnicode_AS_UNICODE(definition); + const Py_ssize_t len = PyUnicode_GET_SIZE(definition); + unicode_charset *lookup = 0; + unsigned char bigmap[UNICODE_CHARSET_BIGMAP_SIZE]; + Py_ssize_t blocks; + int logic = 1; + + /* Handle logic change (first char is '^' for negative matching) */ + if (len > 0 && def[0] == '^') { + logic = 0; + i = 1; + } + else + i = 0; + + /* Build bigmap */ + memset(bigmap, 0, sizeof(bigmap)); + for (; i < len; i++) { + + /* Handle escapes: "b\-d", "\\" */ + if (def[i] == '\\') { + if (i < len - 1 && def[i+1] == '\\') { + j = (int)'\\'; + bigmap[j >> 3] |= 1 << (j & 7); + i++; + } + continue; + } + + /* Handle ranges: "b-d", "\\-z", "\--z" */ + if (i < len - 2 && def[i+1] == '-') { + Py_UNICODE range_left = def[i]; + Py_UNICODE range_right = def[i+2]; + if (range_right >= UNICODE_CHARSET_SIZE) { + Py_Error(PyExc_ValueError, + "unicode ordinal out of supported range"); + } + for (j = range_left; j <= range_right; j++) + bigmap[j >> 3] |= 1 << (j & 7); + i++; + continue; + } + + /* Normal processing */ + j = def[i]; + if (j >= UNICODE_CHARSET_SIZE) { + Py_Error(PyExc_ValueError, + "unicode ordinal out of supported range"); + } + bigmap[j >> 3] |= 1 << (j & 7); + } + + /* Build lookup table + + XXX Could add dynamic resizing here... probably not worth it + though, since sizeof(unicode_charset) isn't all that large. + + */ + lookup = (unicode_charset *)PyMem_Malloc(sizeof(unicode_charset)); + if (lookup == NULL) { + PyErr_NoMemory(); + goto onError; + } + blocks = 0; + for (i = UNICODE_CHARSET_BITMAPS - 1; i >= 0; i--) { + unsigned char *block = &bigmap[i << 5]; + for (j = blocks - 1; j >= 0; j--) + if (memcmp(lookup->bitmaps[j], block, + UNICODE_CHARSET_BITMAP_SIZE) == 0) + break; + if (j < 0) { + j = blocks; + DPRINTF("init_unicode_charset: Creating new block %i for %i\n", + j, i); + memcpy(lookup->bitmaps[j], block, UNICODE_CHARSET_BITMAP_SIZE); + blocks++; + } + else + DPRINTF("init_unicode_charset: Reusing block %i for %i\n", j, i); + lookup->bitmapindex[i] = j; + } + DPRINTF("init_unicode_charset: Map size: %i block(s) = %i bytes\n", + blocks, UNICODE_CHARSET_BITMAPS + + blocks * UNICODE_CHARSET_BITMAP_SIZE); + lookup = (unicode_charset *)PyMem_Realloc(lookup, + UNICODE_CHARSET_BITMAPS + + blocks * UNICODE_CHARSET_BITMAP_SIZE); + if (lookup == NULL) { + PyErr_NoMemory(); + goto onError; + } + + /* Invert bitmaps if negative matching is requested */ + if (!logic) { + register unsigned char *bitmap = &lookup->bitmaps[0][0]; + DPRINTF("init_unicode_charset: inverting bitmaps\n"); + for (i = 0; i < blocks * UNICODE_CHARSET_BITMAP_SIZE; i++) + bitmap[i] ^= 0xFF; + } + + cs->mode = MXCHARSET_UCS2MODE; + cs->lookup = (void *)lookup; + return 0; + + onError: + if (lookup) + PyMem_Free((void *)lookup); + cs->lookup = 0; + return -1; +} + +#endif + +/* allocation */ + +static +PyObject *mxCharSet_New(PyObject *definition) +{ + mxCharSetObject *cs; + + cs = PyObject_NEW(mxCharSetObject, &mxCharSet_Type); + if (cs == NULL) + return NULL; + Py_INCREF(definition); + cs->definition = definition; + cs->lookup = NULL; + cs->mode = -1; + + if (PyString_Check(definition)) { + if (init_string_charset(cs, definition)) + goto onError; + } +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(definition)) { + if (init_unicode_charset(cs, definition)) + goto onError; + } +#endif + else + Py_Error(PyExc_TypeError, + "character set definition must be string or unicode"); + + return (PyObject *)cs; + + onError: + Py_DECREF(cs); + return NULL; +} + +Py_C_Function( mxCharSet_CharSet, + "CharSet(definition)\n\n" + "Create a character set matching object from the string" + ) +{ + PyObject *definition; + + Py_GetArg("O:CharSet", definition); + return mxCharSet_New(definition); + + onError: + return NULL; +} + +static +void mxCharSet_Free(mxCharSetObject *cs) +{ + Py_XDECREF(cs->definition); + if (cs->lookup) + PyMem_Free(cs->lookup); + PyObject_Del(cs); +} + +/* C APIs */ + +#define cs ((mxCharSetObject *)self) + +int mxCharSet_ContainsChar(PyObject *self, + register unsigned char ch) +{ + if (!mxCharSet_Check(self)) { + PyErr_BadInternalCall(); + goto onError; + } + + if (cs->mode == MXCHARSET_8BITMODE) { + unsigned char *bitmap = ((string_charset *)cs->lookup)->bitmap; + return ((bitmap[ch >> 3] & (1 << (ch & 7))) != 0); + } +#ifdef HAVE_UNICODE + else if (cs->mode == MXCHARSET_UCS2MODE) { + unicode_charset *lookup = (unicode_charset *)cs->lookup; + unsigned char *bitmap = lookup->bitmaps[lookup->bitmapindex[0]]; + return ((bitmap[ch >> 3] & (1 << (ch & 7))) != 0); + } +#endif + else { + Py_Error(mxTextTools_Error, + "unsupported character set mode"); + } + + onError: + return -1; +} + +#ifdef HAVE_UNICODE + +int mxCharSet_ContainsUnicodeChar(PyObject *self, + register Py_UNICODE ch) +{ + if (!mxCharSet_Check(self)) { + PyErr_BadInternalCall(); + goto onError; + } + + if (cs->mode == MXCHARSET_8BITMODE) { + unsigned char *bitmap = ((string_charset *)cs->lookup)->bitmap; + if (ch >= 256) + return 0; + return ((bitmap[ch >> 3] & (1 << (ch & 7))) != 0); + } + else if (cs->mode == MXCHARSET_UCS2MODE) { + unicode_charset *lookup = (unicode_charset *)cs->lookup; + unsigned char *bitmap = lookup->bitmaps[lookup->bitmapindex[ch >> 8]]; + return ((bitmap[(ch >> 3) & 31] & (1 << (ch & 7))) != 0); + } + else { + Py_Error(mxTextTools_Error, + "unsupported character set mode"); + } + + onError: + return -1; +} + +#endif + +static +int mxCharSet_Contains(PyObject *self, + PyObject *other) +{ + if (PyString_Check(other)) { + Py_Assert(PyString_GET_SIZE(other) == 1, + PyExc_TypeError, + "expected a single character"); + return mxCharSet_ContainsChar(self, PyString_AS_STRING(other)[0]); + } +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(other)) { + Py_Assert(PyUnicode_GET_SIZE(other) == 1, + PyExc_TypeError, + "expected a single unicode character"); + return mxCharSet_ContainsUnicodeChar(self, + PyUnicode_AS_UNICODE(other)[0]); + } +#endif + else + Py_Error(PyExc_TypeError, + "expected string or unicode character"); + + onError: + return -1; +} + +/* In mode 1, find the position of the first character in text + belonging to set. This may also be stop or start-1 in case no such + character is found during the search (depending on the direction). + + In mode 0, find the first character not in set. This may also be + stop or start-1 in case no such character is found during the + search (depending on the direction). + + The search is done in the slice start:stop. + + -2 is returned in case of an error. + +*/ + +static +int mxCharSet_FindChar(PyObject *self, + unsigned char *text, + Py_ssize_t start, + Py_ssize_t stop, + const int mode, + const int direction) +{ + register Py_ssize_t i; + register unsigned int c; + register unsigned int block; + unsigned char *bitmap; + + if (!mxCharSet_Check(self)) { + PyErr_BadInternalCall(); + goto onError; + } + + if (cs->mode == MXCHARSET_8BITMODE) + bitmap = ((string_charset *)cs->lookup)->bitmap; +#ifdef HAVE_UNICODE + else if (cs->mode == MXCHARSET_UCS2MODE) { + unicode_charset *lookup = (unicode_charset *)cs->lookup; + bitmap = lookup->bitmaps[lookup->bitmapindex[0]]; + } +#endif + else { + Py_Error(mxTextTools_Error, + "unsupported character set mode"); + } + + if (direction > 0) { + if (mode) + /* Find first char in set */ + for (i = start; i < stop; i++) { + c = text[i]; + block = bitmap[c >> 3]; + if (block && ((block & (1 << (c & 7))) != 0)) + break; + } + else + /* Find first char not in set */ + for (i = start; i < stop; i++) { + c = text[i]; + block = bitmap[c >> 3]; + if (!block || ((block & (1 << (c & 7))) == 0)) + break; + } + } + else { + if (mode) + /* Find first char in set, searching from the end */ + for (i = stop - 1; i >= start; i--) { + c = text[i]; + block = bitmap[c >> 3]; + if (block && ((block & (1 << (c & 7))) != 0)) + break; + } + else + /* Find first char not in set, searching from the end */ + for (i = stop - 1; i >= start; i--) { + c = text[i]; + block = bitmap[c >> 3]; + if (!block || ((block & (1 << (c & 7))) == 0)) + break; + } + } + return i; + + onError: + return -2; +} + +#ifdef HAVE_UNICODE + +static +int mxCharSet_FindUnicodeChar(PyObject *self, + Py_UNICODE *text, + Py_ssize_t start, + Py_ssize_t stop, + const int mode, + const int direction) +{ + register int i; + register unsigned int c; + register unsigned int block; + unsigned char *bitmap; + + if (!mxCharSet_Check(self)) { + PyErr_BadInternalCall(); + goto onError; + } + + if (cs->mode == MXCHARSET_8BITMODE) { + bitmap = ((string_charset *)cs->lookup)->bitmap; + if (direction > 0) { + if (mode) + /* Find first char in set */ + for (i = start; i < stop; i++) { + c = text[i]; + if (c > 256) + continue; + block = bitmap[c >> 3]; + if (block && ((block & (1 << (c & 7))) != 0)) + break; + } + else + /* Find first char not in set */ + for (i = start; i < stop; i++) { + c = text[i]; + if (c > 256) + break; + block = bitmap[c >> 3]; + if (!block || ((block & (1 << (c & 7))) == 0)) + break; + } + } + else { + if (mode) + /* Find first char in set, searching from the end */ + for (i = stop - 1; i >= start; i--) { + c = text[i]; + if (c > 256) + continue; + block = bitmap[c >> 3]; + if (block && ((block & (1 << (c & 7))) != 0)) + break; + } + else + /* Find first char not in set, searching from the end */ + for (i = stop - 1; i >= start; i--) { + c = text[i]; + if (c > 256) + break; + block = bitmap[c >> 3]; + if (!block || ((block & (1 << (c & 7))) == 0)) + break; + } + } + return i; + } + +#ifdef HAVE_UNICODE + else if (cs->mode == MXCHARSET_UCS2MODE) { + unicode_charset *lookup = (unicode_charset *)cs->lookup; + if (direction > 0) { + if (mode) + /* Find first char in set */ + for (i = start; i < stop; i++) { + c = text[i]; + bitmap = lookup->bitmaps[lookup->bitmapindex[c >> 8]]; + block = bitmap[(c >> 3) & 31]; + if (block && ((block & (1 << (c & 7))) != 0)) + break; + } + else + /* Find first char not in set */ + for (i = start; i < stop; i++) { + c = text[i]; + bitmap = lookup->bitmaps[lookup->bitmapindex[c >> 8]]; + block = bitmap[(c >> 3) & 31]; + if (!block || ((block & (1 << (c & 7))) == 0)) + break; + } + } + else { + if (mode) + /* Find first char in set, searching from the end */ + for (i = stop - 1; i >= start; i--) { + c = text[i]; + bitmap = lookup->bitmaps[lookup->bitmapindex[c >> 8]]; + block = bitmap[(c >> 3) & 31]; + if (block && ((block & (1 << (c & 7))) != 0)) + break; + } + else + /* Find first char not in set, searching from the end */ + for (i = stop - 1; i >= start; i--) { + c = text[i]; + bitmap = lookup->bitmaps[lookup->bitmapindex[c >> 8]]; + block = bitmap[(c >> 3) & 31]; + if (!block || ((block & (1 << (c & 7))) == 0)) + break; + } + } + return i; + } +#endif + else { + Py_Error(mxTextTools_Error, + "unsupported character set mode"); + } + + onError: + return -2; +} + +#endif + +/* Return the position of the first character in text[start:stop] + occurring in set or -1 in case no such character exists. + +*/ + +static +int mxCharSet_Search(PyObject *self, + PyObject *text, + Py_ssize_t start, + Py_ssize_t stop, + int direction) +{ + Py_ssize_t position; + + if (PyString_Check(text)) { + Py_CheckStringSlice(text, start, stop); + position = mxCharSet_FindChar(self, + (unsigned char *)PyString_AS_STRING(text), + start, + stop, + 1, + direction); + } +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) { + Py_CheckUnicodeSlice(text, start, stop); + position = mxCharSet_FindUnicodeChar(self, + PyUnicode_AS_UNICODE(text), + start, + stop, + 1, + direction); + } +#endif + else + Py_Error(PyExc_TypeError, + "expected string or unicode"); + + if ((direction > 0 && position >= stop) || + (direction <= 0 && position < start)) + position = -1; + return position; + + onError: + return -2; +} + +/* Return the longest match of characters from set in + text[start:stop]. + + If direction is positive, the search is done from the left (longest + prefix), otherwise it is started from the right (longest suffix). + + -1 is returned in case of an error. + +*/ + +Py_ssize_t mxCharSet_Match(PyObject *self, + PyObject *text, + Py_ssize_t start, + Py_ssize_t stop, + int direction) +{ + Py_ssize_t position; + + if (PyString_Check(text)) { + Py_CheckStringSlice(text, start, stop); + position = mxCharSet_FindChar(self, + (unsigned char *)PyString_AS_STRING(text), + start, + stop, + 0, + direction); + } +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) { + Py_CheckUnicodeSlice(text, start, stop); + position = mxCharSet_FindUnicodeChar(self, + PyUnicode_AS_UNICODE(text), + start, + stop, + 0, + direction); + } +#endif + else + Py_Error(PyExc_TypeError, + "expected string or unicode"); + + if (position < -1) + goto onError; + if (direction > 0) + return position - start; + else + return stop-1 - position; + + onError: + return -1; +} + +/* Stips off characters appearing in the character set from text[start:stop] + and returns the result as Python string object. + + where indicates the mode: + where < 0: strip left only + where = 0: strip left and right + where > 0: strip right only + +*/ +static +PyObject *mxCharSet_Strip(PyObject *self, + PyObject *text, + Py_ssize_t start, + Py_ssize_t stop, + Py_ssize_t where) +{ + Py_ssize_t left,right; + + if (!mxCharSet_Check(self)) { + PyErr_BadInternalCall(); + goto onError; + } + + if (PyString_Check(text)) { + Py_CheckStringSlice(text, start, stop); + + /* Strip left */ + if (where <= 0) { + left = mxCharSet_FindChar(self, + (unsigned char *)PyString_AS_STRING(text), + start, + stop, + 0, + 1); + if (left < 0) + goto onError; + } + else + left = start; + + /* Strip right */ + if (where >= 0) { + right = mxCharSet_FindChar(self, + (unsigned char *)PyString_AS_STRING(text), + left, + stop, + 0, + -1) + 1; + if (right < 0) + goto onError; + } + else + right = stop; + + return PyString_FromStringAndSize(PyString_AS_STRING(text) + left, + max(right - left, 0)); + } +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) { + Py_CheckUnicodeSlice(text, start, stop); + + /* Strip left */ + if (where <= 0) { + left = mxCharSet_FindUnicodeChar(self, + PyUnicode_AS_UNICODE(text), + start, + stop, + 0, + 1); + if (left < 0) + goto onError; + } + else + left = start; + + /* Strip right */ + if (where >= 0) { + right = mxCharSet_FindUnicodeChar(self, + PyUnicode_AS_UNICODE(text), + start, + stop, + 0, + -1) + 1; + if (right < 0) + goto onError; + } + else + right = stop; + + return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text) + left, + max(right - left, 0)); + } +#endif + else + Py_Error(PyExc_TypeError, + "expected string or unicode"); + + onError: + return NULL; +} + +static +PyObject *mxCharSet_Split(PyObject *self, + PyObject *text, + Py_ssize_t start, + Py_ssize_t text_len, + int include_splits) +{ + PyObject *list = NULL; + PyObject *s; + register Py_ssize_t x; + Py_ssize_t listitem = 0; + Py_ssize_t listsize = INITIAL_LIST_SIZE; + + if (!mxCharSet_Check(self)) { + PyErr_BadInternalCall(); + goto onError; + } + + list = PyList_New(listsize); + if (!list) + goto onError; + + if (PyString_Check(text)) { + unsigned char *tx = (unsigned char *)PyString_AS_STRING(text); + + Py_CheckStringSlice(text, start, text_len); + + x = start; + while (x < text_len) { + Py_ssize_t z; + + /* Skip all text in set (include_splits == 0), not in set + (include_splits == 1) */ + z = x; + x = mxCharSet_FindChar(self, tx, x, text_len, include_splits, 1); + + /* Append the slice to list */ + if (include_splits) { + s = PyString_FromStringAndSize((char *)&tx[z], x - z); + if (!s) + goto onError; + if (listitem < listsize) + PyList_SET_ITEM(list,listitem,s); + else { + PyList_Append(list,s); + Py_DECREF(s); + } + listitem++; + + if (x >= text_len) + break; + } + + /* Skip all text in set (include_splits == 1), not in set + (include_splits == 0) */ + z = x; + x = mxCharSet_FindChar(self, tx, x, text_len, !include_splits, 1); + + /* Append the slice to list if it is not empty */ + if (x > z) { + s = PyString_FromStringAndSize((char *)&tx[z], x - z); + if (!s) + goto onError; + if (listitem < listsize) + PyList_SET_ITEM(list,listitem,s); + else { + PyList_Append(list,s); + Py_DECREF(s); + } + listitem++; + } + } + + } +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) { + Py_UNICODE *tx = PyUnicode_AS_UNICODE(text); + + Py_CheckUnicodeSlice(text, start, text_len); + + x = start; + while (x < text_len) { + Py_ssize_t z; + + /* Skip all text in set (include_splits == 0), not in set + (include_splits == 1) */ + z = x; + x = mxCharSet_FindUnicodeChar(self, tx, x, text_len, include_splits, 1); + + /* Append the slice to list */ + if (include_splits) { + s = PyUnicode_FromUnicode(&tx[z], x - z); + if (!s) + goto onError; + if (listitem < listsize) + PyList_SET_ITEM(list,listitem,s); + else { + PyList_Append(list,s); + Py_DECREF(s); + } + listitem++; + + if (x >= text_len) + break; + } + + /* Skip all text in set (include_splits == 1), not in set + (include_splits == 0) */ + z = x; + x = mxCharSet_FindUnicodeChar(self, tx, x, text_len, !include_splits, 1); + + /* Append the slice to list if it is not empty */ + if (x > z) { + s = PyUnicode_FromUnicode(&tx[z], x - z); + if (!s) + goto onError; + if (listitem < listsize) + PyList_SET_ITEM(list,listitem,s); + else { + PyList_Append(list,s); + Py_DECREF(s); + } + listitem++; + } + } + } +#endif + else + Py_Error(PyExc_TypeError, + "expected string or unicode"); + + /* Resize list if necessary */ + if (listitem < listsize) + PyList_SetSlice(list, listitem, listsize, (PyObject*)NULL); + + return list; + + onError: + Py_XDECREF(list); + return NULL; +} + +/* methods */ + +Py_C_Function( mxCharSet_contains, + ".contains(char)\n\n" + ) +{ + PyObject *chr; + int rc; + + Py_GetArg("O:CharSet.contains", chr); + + rc = mxCharSet_Contains(self, chr); + if (rc < 0) + goto onError; + return PyInt_FromLong(rc); + + onError: + return NULL; +} + +Py_C_Function( mxCharSet_search, + ".search(text[, direction=1, start=0, stop=len(text)])\n\n" + ) +{ + PyObject *text; + int direction = 1; + Py_ssize_t start = 0, stop = INT_MAX; + int rc; + + Py_Get4Args("O|iii:CharSet.search", text, direction, start, stop); + + rc = mxCharSet_Search(self, text, start, stop, direction); + if (rc == -1) + Py_ReturnNone(); + if (rc < -1) + goto onError; + return PyInt_FromLong(rc); + + onError: + return NULL; +} + +Py_C_Function( mxCharSet_match, + ".match(text[, direction=1, start=0, stop=len(text)])\n\n" + ) +{ + PyObject *text; + int direction = 1; + Py_ssize_t start = 0, stop = INT_MAX; + int rc; + + Py_Get4Args("O|iii:CharSet.match", text, direction, start, stop); + + rc = mxCharSet_Match(self, text, start, stop, direction); + if (rc < 0) + goto onError; + return PyInt_FromLong(rc); + + onError: + return NULL; +} + +Py_C_Function( mxCharSet_split, + ".split(text[, start=0, stop=len(text)])\n\n" + ) +{ + PyObject *text; + Py_ssize_t start = 0, stop = INT_MAX; + + Py_Get3Args("O|ii:CharSet.split", text, start, stop); + + return mxCharSet_Split(self, text, start, stop, 0); + + onError: + return NULL; +} + +Py_C_Function( mxCharSet_splitx, + ".splitx(text[, start=0, stop=len(text)])\n\n" + ) +{ + PyObject *text; + Py_ssize_t start = 0, stop = INT_MAX; + + Py_Get3Args("O|ii:CharSet.splitx", text, start, stop); + + return mxCharSet_Split(self, text, start, stop, 1); + + onError: + return NULL; +} + +Py_C_Function( mxCharSet_strip, + ".strip(text[, where=0, start=0, stop=len(text)])\n\n" + ) +{ + PyObject *text; + Py_ssize_t where = 0; + Py_ssize_t start = 0, stop = INT_MAX; + + Py_Get4Args("O|iii:CharSet.strip", text, where, start, stop); + + return mxCharSet_Strip(self, text, start, stop, where); + + onError: + return NULL; +} + +#ifdef COPY_PROTOCOL +Py_C_Function( mxCharSet_copy, + "copy([memo])\n\n" + "Return a new reference for the instance. This function\n" + "is used for the copy-protocol. Real copying doesn't take\n" + "place, since the instances are immutable.") +{ + PyObject *memo; + + Py_GetArg("|O",memo); + Py_INCREF(cs); + return (PyObject *)cs; + onError: + return NULL; +} +#endif + +#undef cs + +/* --- slots --- */ + +static +PyObject *mxCharSet_Repr(mxCharSetObject *self) +{ + PyObject *v; + char t[500], *reprstr; + + v = PyObject_Repr(self->definition); + if (v == NULL) + return NULL; + reprstr = PyString_AsString(v); + if (reprstr == NULL) + return NULL; + sprintf(t, "", + reprstr, (long)self); + Py_DECREF(v); + return PyString_FromString(t); +} + +/* Python Type Tables */ + +static +PySequenceMethods mxCharSet_TypeAsSequence = { + (lenfunc)0, /*sq_length*/ + (binaryfunc)0, /*sq_concat*/ + (ssizeargfunc)0, /*sq_repeat*/ + (ssizeargfunc)0, /*sq_item*/ + (ssizessizeargfunc)0, /*sq_slice*/ + (ssizeobjargproc)0, /*sq_ass_item*/ + (ssizessizeobjargproc)0, /*sq_ass_slice*/ + (objobjproc)mxCharSet_Contains, /*sq_contains*/ +}; + +static +PyMemberDef mxCharSet_Members[] = { + {"definition",T_OBJECT_EX,offsetof(mxCharSetObject,definition),READONLY,"Definition"}, + {NULL} +}; + +static +PyMethodDef mxCharSet_Methods[] = +{ + Py_MethodListEntry("contains",mxCharSet_contains), + Py_MethodListEntry("search",mxCharSet_search), + Py_MethodListEntry("match",mxCharSet_match), + Py_MethodListEntry("strip",mxCharSet_strip), + Py_MethodListEntry("split",mxCharSet_split), + Py_MethodListEntry("splitx",mxCharSet_splitx), +#ifdef COPY_PROTOCOL + Py_MethodListEntry("__deepcopy__",mxCharSet_copy), + Py_MethodListEntry("__copy__",mxCharSet_copy), +#endif + {NULL,NULL} /* end of list */ +}; + +PyTypeObject mxCharSet_Type = { + PyVarObject_HEAD_INIT(NULL, 0) /* init at startup ! */ + "Character Set", /* tp_name */ + sizeof(mxCharSetObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)mxCharSet_Free, /* tp_dealloc */ + (printfunc)0, /* tp_print */ + (getattrfunc)0, /* tp_getattr */ + (setattrfunc)0, /* tp_setattr */ + 0, /* tp_compare */ + (reprfunc)mxCharSet_Repr, /* tp_repr */ + 0, /* tp_as_number */ + &mxCharSet_TypeAsSequence, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + (hashfunc)0, /* tp_hash */ + (ternaryfunc)0, /* tp_call */ + (reprfunc)0, /* tp_str */ + (getattrofunc)0, /* tp_getattro */ + (setattrofunc)0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + (char*) 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + mxCharSet_Methods, /* tp_methods */ + mxCharSet_Members, /* tp_members */ +}; + +/* --- Tag Table Object ------------------------------------------------*/ + +PyObject *mxTagTable_New(PyObject *definition, + int tabletype, + int cacheable); + +/* internal APIs */ + +static +PyObject *tc_get_item(register PyObject *obj, + register Py_ssize_t i) +{ + if (PyTuple_Check(obj)) { + if (i > PyTuple_GET_SIZE(obj)) + return NULL; + return PyTuple_GET_ITEM(obj, i); + } + else if (PyList_Check(obj)) { + if (i > PyList_GET_SIZE(obj)) + return NULL; + return PyList_GET_ITEM(obj, i); + } + else + return NULL; +} + +static +Py_ssize_t tc_length(register PyObject *obj) +{ + if (obj == NULL) + return -1; + else if (PyTuple_Check(obj)) + return PyTuple_GET_SIZE(obj); + else if (PyList_Check(obj)) + return PyList_GET_SIZE(obj); + else + return -1; +} + +/* Add a jump target to the jump dictionary */ + +static +Py_ssize_t tc_add_jumptarget(PyObject *jumpdict, + PyObject *targetname, + Py_ssize_t index) +{ + PyObject *v; + + v = PyDict_GetItem(jumpdict, targetname); + if (v != NULL) + Py_ErrorWithArg(PyExc_TypeError, + "tag table entry %d: " + "jump target already defined", (unsigned int) index); + v = PyInt_FromLong(index); + if (v == NULL) + goto onError; + if (PyDict_SetItem(jumpdict, targetname, v)) + goto onError; + Py_DECREF(v); + return 0; + + onError: + return -1; +} + +/* Convert a string command argument to either an 8-bit string or + Unicode depending on the tabletype. */ + +static +PyObject *tc_convert_string_arg(PyObject *arg, + Py_ssize_t tableposition, + int tabletype) +{ + /* Convert to strings */ + if (tabletype == MXTAGTABLE_STRINGTYPE) { + if (PyString_Check(arg)) + return arg; +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(arg)) { + Py_DECREF(arg); + arg = PyUnicode_AsEncodedString(arg, + NULL, + NULL); + if (arg == NULL) + Py_ErrorWithArg(PyExc_TypeError, + "tag table entry %d: " + "conversion from Unicode to " + "string failed", (unsigned int)tableposition); + } +#endif + else + Py_ErrorWithArg(PyExc_TypeError, + "tag table entry %d: " + "command argument must be a " + "string or unicode", (unsigned int)tableposition); + } + +#ifdef HAVE_UNICODE + /* Convert to Unicode */ + else if (tabletype == MXTAGTABLE_UNICODETYPE) { + if (PyUnicode_Check(arg)) + return arg; + else if (PyString_Check(arg)) { + Py_DECREF(arg); + arg = PyUnicode_Decode(PyString_AS_STRING(arg), + PyString_GET_SIZE(arg), + NULL, + NULL); + if (arg == NULL) + Py_ErrorWithArg(PyExc_TypeError, + "tag table entry %d: " + "conversion from string to " + "Unicode failed", (unsigned int)tableposition); + } + else + Py_ErrorWithArg(PyExc_TypeError, + "tag table entry %d: " + "command argument must be a " + "string or unicode", (unsigned int)tableposition); + } +#endif + + else + Py_Error(mxTextTools_Error, + "unsupported table type"); + + return arg; + + onError: + return NULL; +} + +/* Cleanup any references in the tag table. */ + +static +int tc_cleanup(mxTagTableObject *tagtable) +{ + Py_ssize_t i; + for (i = 0; i < tagtable->numentries; i++) { + mxTagTableEntry *tagtableentry = &tagtable->entry[i]; + + Py_XDECREF(tagtableentry->tagobj); + tagtableentry->tagobj = NULL; + Py_XDECREF(tagtableentry->args); + tagtableentry->args = NULL; + } + return 0; +} + +/* Initialize the tag table (this is the actual Tag Table compiler) */ + +static +int init_tag_table(mxTagTableObject *tagtable, + PyObject *table, + Py_ssize_t size, + int tabletype, + int cacheable) +{ + Py_ssize_t i; + PyObject *entry; + Py_ssize_t entry_len; + PyObject *tagobj, *command, *args = 0, *je, *jne; + PyObject *jumpdict, *v; + int secondpass, own_args = 0; + + jumpdict = PyDict_New(); + if (jumpdict == NULL) + return -1; + + /* Reset to all fields to 0 */ + memset(&tagtable->entry[0], 0, size * sizeof(mxTagTableEntry)); + + /* First pass */ + secondpass = 0; + tagtable->numentries = size; + for (i = 0; i < size; i++) { + mxTagTableEntry *tagtableentry = &tagtable->entry[i]; + + /* Get table entry i and parse it */ + entry = tc_get_item(table, i); + if (entry == NULL) { + Py_ErrorWithArg(PyExc_TypeError, + "tag table entry %d: " + "not found or not a supported entry type", (unsigned int)i); + } + + /* Special handling for jump marks (args is set to the jump + mark string, jump target index is the next table entry) */ + if (PyString_Check(entry)) { + if (tc_add_jumptarget(jumpdict, entry, i + 1)) + goto onError; + tagtableentry->tagobj = NULL; + tagtableentry->cmd = MATCH_JUMPTARGET; + tagtableentry->flags = 0; + Py_INCREF(entry); + tagtableentry->args = entry; + tagtableentry->jne = 0; + tagtableentry->je = 1; + continue; + } + + /* Get entry length */ + entry_len = tc_length(entry); + if (entry_len < 3) { + Py_ErrorWithArg(PyExc_TypeError, + "tag table entry %d: " + "expected an entry of the form " + "(tagobj,command,arg[,jne[,je]])", (unsigned int)i); + } + + /* Decode entry parts: (tagobj, command, args[, jne[, je]]) */ + tagobj = tc_get_item(entry, 0); + command = tc_get_item(entry, 1); + args = tc_get_item(entry, 2); + if (entry_len >= 4) + jne = tc_get_item(entry, 3); + else + jne = NULL; + if (entry_len >= 5) + je = tc_get_item(entry, 4); + else + je = NULL; + + if (tagobj == NULL || + command == NULL || + args == NULL || + (entry_len >= 4 && jne == NULL) || + (entry_len >= 5 && je == NULL)) { + Py_ErrorWithArg(PyExc_TypeError, + "tag table entry %d: " + "expected an entry of the form " + "(tagobj,command,arg[,jne[,je]])",(unsigned int) i); + } + + /* Store tagobj, None gets converted to NULL */ + if (tagobj != Py_None) + Py_INCREF(tagobj); + else + tagobj = NULL; + tagtableentry->tagobj = tagobj; + + /* Decode command and flags */ + Py_AssertWithArg(PyInt_Check(command), + PyExc_TypeError, + "tag table entry %d: " + "command must be an integer",(unsigned int)i); + tagtableentry->cmd = PyInt_AS_LONG(command) & 0xFF; + tagtableentry->flags = PyInt_AS_LONG(command) - tagtableentry->cmd; + + /* Check command arguments */ + Py_INCREF(args); + own_args = 1; + + switch (tagtableentry->cmd) { + + case MATCH_JUMP: /* == MATCH_FAIL */ + case MATCH_EOF: + case MATCH_LOOP: + /* args is ignored */ + break; + + case MATCH_SKIP: + case MATCH_MOVE: + case MATCH_LOOPCONTROL: + Py_AssertWithArg(PyInt_Check(args), + PyExc_TypeError, + "tag table entry %d: " + "Skip|Move|LoopControl command argument " + "must be an integer", (unsigned int)i); + break; + + case MATCH_JUMPTARGET: + Py_AssertWithArg(PyString_Check(args), + PyExc_TypeError, + "tag table entry %d: " + "JumpMark command argument must be a string",(unsigned int)i); + if (tc_add_jumptarget(jumpdict, args, i + 1)) + goto onError; + break; + + case MATCH_ALLIN: + case MATCH_ALLNOTIN: + case MATCH_IS: + case MATCH_ISIN: + case MATCH_ISNOTIN: + case MATCH_WORD: + case MATCH_WORDSTART: + case MATCH_WORDEND: + args = tc_convert_string_arg(args, i, tabletype); + if (args == NULL) + goto onError; + break; + + case MATCH_ALLINSET: + case MATCH_ISINSET: + Py_AssertWithArg(PyString_Check(args) && + PyString_GET_SIZE(args) == 32, + PyExc_TypeError, + "tag table entry %d: " + "AllInSet|IsInSet command argument must " + "be a set() string",(unsigned int)i); + break; + + case MATCH_ALLINCHARSET: + case MATCH_ISINCHARSET: + Py_AssertWithArg(mxCharSet_Check(args), + PyExc_TypeError, + "tag table entry %d: " + "AllInCharSet|IsInCharSet command argument must " + "be a CharSet instance",(unsigned int)i); + break; + + case MATCH_SWORDSTART: /* == MATCH_NOWORD */ + case MATCH_SWORDEND: + case MATCH_SFINDWORD: + Py_AssertWithArg(mxTextSearch_Check(args), + PyExc_TypeError, + "tag table entry %d: " + "sWordStart|sWordEnd|sFindWord command " + "argument must be a TextSearch search " + "object",(unsigned int)i); + break; + + case MATCH_TABLE: + case MATCH_SUBTABLE: + Py_AssertWithArg(mxTagTable_Check(args) || + PyTuple_Check(args) || + PyList_Check(args) || + (PyInt_Check(args) && + PyInt_AS_LONG(args) == MATCH_THISTABLE), + PyExc_TypeError, + "tag table entry %d: " + "Table|SubTable command argument " + "must be a tag table tuple/object or " + "ThisTable", (unsigned int)i); + /* XXX We shouldn't recursively compile tag table tuples here + because this will slow down the compile process + too much and it's not clear whether this particular + table will ever be used during tagging. + */ + if (!mxTagTable_Check(args) && !PyInt_Check(args)) { + Py_DECREF(args); + args = mxTagTable_New(args, tabletype, cacheable); + if (args == NULL) + goto onError; + } + break; + + case MATCH_TABLEINLIST: + case MATCH_SUBTABLEINLIST: + Py_AssertWithArg(PyTuple_Check(args) && + PyTuple_GET_SIZE(args) == 2 && + PyList_Check(PyTuple_GET_ITEM(args, 0)) && + PyInt_Check(PyTuple_GET_ITEM(args, 1)), + PyExc_TypeError, + "tag table entry %d: " + "TableInList|SubTableInList command argument " + "must be a 2-tuple (list, integer)", + (unsigned int)i); + break; + + case MATCH_CALL: + Py_AssertWithArg(PyCallable_Check(args), + PyExc_TypeError, + "tag table entry %d: " + "Call command argument " + "must be a callable object", + (unsigned int)i); + break; + + case MATCH_CALLARG: + Py_AssertWithArg(PyTuple_Check(args) && + PyTuple_GET_SIZE(args) > 0 && + PyCallable_Check(PyTuple_GET_ITEM(args, 0)), + PyExc_TypeError, + "tag table entry %d: " + "CallArg command argument " + "must be a tuple (fct,[arg0,arg1,...])", + (unsigned int)i); + break; + + default: + Py_ErrorWith2Args(PyExc_TypeError, + "tag table entry %d: " + "unknown command integer: %i", + (unsigned int)i, tagtableentry->cmd); + + } + + /* Store command args */ + tagtableentry->args = args; + own_args = 0; + + /* Decode jump offsets */ + if (jne) { + if (PyInt_Check(jne)) + tagtableentry->jne = PyInt_AS_LONG(jne); + else if (PyString_Check(jne)) { + /* Mark for back-patching */ + tagtableentry->jne = -424242; + secondpass = 1; + } + else + Py_ErrorWithArg(PyExc_TypeError, + "tag table entry %d: " + "jne must be an integer or string", (unsigned int)i); + } + else + tagtableentry->jne = 0; + + if (je) { + if (PyInt_Check(je)) + tagtableentry->je = PyInt_AS_LONG(je); + else if (PyString_Check(je)) { + /* Mark for back-patching */ + tagtableentry->je = -424242; + secondpass = 1; + } + else + Py_ErrorWithArg(PyExc_TypeError, + "tag table entry %d: " + "je must be an integer or string", (unsigned int)i); + } + else + tagtableentry->je = 1; + } + + /* Second pass (needed to patch string jump targets) */ + if (secondpass) + for (i = 0; i < size; i++) { + mxTagTableEntry *tagtableentry = &tagtable->entry[i]; + + if (tagtableentry->je != -424242 && + tagtableentry->jne != -424242) + continue; + + /* Entry (most probably) needs back-patching */ + entry = tc_get_item(table, i); + if (entry == NULL) { + Py_ErrorWithArg(PyExc_TypeError, + "tag table entry %d: " + "unexpected error (not found)", (unsigned int)i); + } + + /* Get entry length */ + entry_len = tc_length(entry); + if (entry_len < 0) { + Py_ErrorWithArg(PyExc_TypeError, + "tag table entry %d: " + "unexpected error (no length)", (unsigned int)i); + } + + /* Decode jump offsets */ + if (entry_len >= 4) + jne = tc_get_item(entry, 3); + else + jne = NULL; + if (entry_len >= 5) + je = tc_get_item(entry, 4); + else + je = NULL; + + /* Patch jump offsets */ + if (jne && PyString_Check(jne)) { + v = PyDict_GetItem(jumpdict, jne); + if (v == NULL || !PyInt_Check(v)) + Py_ErrorWith2Args(PyExc_TypeError, + "tag table entry %d: " + "jne jump target '%s' not found", + (unsigned int)i, PyString_AS_STRING(jne)); + tagtableentry->jne = PyInt_AS_LONG(v) - i; + } + if (je && PyString_Check(je)) { + v = PyDict_GetItem(jumpdict, je); + if (v == NULL || !PyInt_Check(v)) + Py_ErrorWith2Args(PyExc_TypeError, + "tag table entry %d: " + "je jump target '%s' not found", + (unsigned int)i, PyString_AS_STRING(je)); + tagtableentry->je = PyInt_AS_LONG(v) - i; + } + } + + Py_DECREF(jumpdict); + return 0; + + onError: + if (own_args) { + Py_DECREF(args); + } + return -1; +} + +/* Check the cache for an already compiled TagTable for this + definition. Return NULL in case of an error, Py_None without + INCREF in case no such table was found or the TagTable object. */ + +static +PyObject *consult_tagtable_cache(PyObject *definition, + int tabletype, + int cacheable) +{ + PyObject *v, *key, *tt; + + if (!PyTuple_Check(definition) || !cacheable) + return Py_None; + + key = PyTuple_New(2); + if (key == NULL) + goto onError; + v = PyInt_FromLong((long) definition); + if (v == NULL) + goto onError; + PyTuple_SET_ITEM(key, 0, v); + v = PyInt_FromLong(tabletype); + if (v == NULL) + goto onError; + PyTuple_SET_ITEM(key, 1, v); + tt = PyDict_GetItem(mxTextTools_TagTables, key); + Py_DECREF(key); + if (tt != NULL) { + Py_INCREF(tt); + return tt; + } + return Py_None; + + onError: + return NULL; +} + +/* Adds the compiled tagtable to the cache. Returns -1 in case of an + error, 0 on success. */ + +static +int add_to_tagtable_cache(PyObject *definition, + int tabletype, + int cacheable, + PyObject *tagtable) +{ + PyObject *v, *key; + int rc; + + if (!PyTuple_Check(definition) || !cacheable) + return 0; + + key = PyTuple_New(2); + if (key == NULL) + goto onError; + v = PyInt_FromLong((long) definition); + if (v == NULL) + goto onError; + PyTuple_SET_ITEM(key, 0, v); + v = PyInt_FromLong(tabletype); + if (v == NULL) + goto onError; + PyTuple_SET_ITEM(key, 1, v); + + /* Hard-limit the cache size */ + if (PyDict_Size(mxTextTools_TagTables) >= MAX_TAGTABLES_CACHE_SIZE) + PyDict_Clear(mxTextTools_TagTables); + + rc = PyDict_SetItem(mxTextTools_TagTables, key, tagtable); + Py_DECREF(key); + if (rc) + goto onError; + return 0; + + onError: + return -1; +} + + +/* allocation */ + +PyObject *mxTagTable_New(PyObject *definition, + int tabletype, + int cacheable) +{ + mxTagTableObject *tagtable = 0; + PyObject *v; + Py_ssize_t size; + + /* First, consult the TagTable cache */ + v = consult_tagtable_cache(definition, tabletype, cacheable); + if (v == NULL) + goto onError; + else if (v != Py_None) + return v; + + size = tc_length(definition); + if (size < 0) + Py_Error(PyExc_TypeError, + "tag table definition must be a tuple or a list"); + + tagtable = PyObject_NEW_VAR(mxTagTableObject, &mxTagTable_Type, size); + if (tagtable == NULL) + goto onError; + if (cacheable) { + Py_INCREF(definition); + tagtable->definition = definition; + } + else + tagtable->definition = NULL; + tagtable->tabletype = tabletype; + + /* Compile table ... */ + if (init_tag_table(tagtable, definition, size, tabletype, cacheable)) + goto onError; + + /* Cache the compiled table if it is cacheable and derived from a + tuple */ + if (add_to_tagtable_cache(definition, tabletype, cacheable, + (PyObject *)tagtable)) + goto onError; + + return (PyObject *)tagtable; + + onError: + Py_XDECREF(tagtable); + return NULL; +} + +Py_C_Function( mxTagTable_TagTable, + "TagTable(definition[,cachable=1])\n\n" + ) +{ + PyObject *definition; + int cacheable = 1; + + Py_Get2Args("O|i:TagTable", definition, cacheable); + return mxTagTable_New(definition, 0, cacheable); + + onError: + return NULL; +} + +#ifdef HAVE_UNICODE +Py_C_Function( mxTagTable_UnicodeTagTable, + "TagTable(definition[,cachable=1])\n\n" + ) +{ + PyObject *definition; + int cacheable = 1; + + Py_Get2Args("O|i:UnicodeTagTable", definition, cacheable); + return mxTagTable_New(definition, 1, cacheable); + + onError: + return NULL; +} +#endif + +static +void mxTagTable_Free(mxTagTableObject *tagtable) +{ + tc_cleanup(tagtable); + Py_XDECREF(tagtable->definition); + PyObject_Del(tagtable); +} + +/* C APIs */ + +#define tagtable ((mxTagTableObject *)self) + +static +PyObject *mxTagTable_CompiledDefinition(PyObject *self) +{ + PyObject *tuple = 0, *v, *w; + Py_ssize_t i; + Py_ssize_t size; + + if (!mxTagTable_Check(self)) { + PyErr_BadInternalCall(); + goto onError; + } + + size = tagtable->numentries; + tuple = PyTuple_New(size); + if (tuple == NULL) + goto onError; + + for (i = 0; i < size; i++) { + mxTagTableEntry *tagtableentry = &tagtable->entry[i]; + + /* Build tuple (tagobj, command, args, jne, je) */ + v = PyTuple_New(5); + if (v == NULL) + goto onError; + w = tagtableentry->tagobj; + if (w == NULL) + w = Py_None; + Py_INCREF(w); + PyTuple_SET_ITEM(v, 0, w); + PyTuple_SET_ITEM(v, 1, PyInt_FromLong(tagtableentry->cmd | + tagtableentry->flags)); + w = tagtableentry->args; + if (w == NULL) + w = Py_None; + Py_INCREF(w); + PyTuple_SET_ITEM(v, 2, w); + PyTuple_SET_ITEM(v, 3, PyInt_FromLong(tagtableentry->jne)); + PyTuple_SET_ITEM(v, 4, PyInt_FromLong(tagtableentry->je)); + if (PyErr_Occurred()) { + Py_DECREF(v); + goto onError; + } + PyTuple_SET_ITEM(tuple, i, v); + } + + return tuple; + + onError: + Py_XDECREF(tuple); + return NULL; +} + + +/* methods */ + +Py_C_Function( mxTagTable_compiled, + ".compiled()\n\n" + ) +{ + Py_NoArgsCheck(); + return mxTagTable_CompiledDefinition(self); + + onError: + return NULL; +} + +#ifdef COPY_PROTOCOL +Py_C_Function( mxTagTable_copy, + "copy([memo])\n\n" + "Return a new reference for the instance. This function\n" + "is used for the copy-protocol. Real copying doesn't take\n" + "place, since the instances are immutable.") +{ + PyObject *memo; + + Py_GetArg("|O",memo); + Py_INCREF(tagtable); + return (PyObject *)tagtable; + + onError: + return NULL; +} +#endif + +#undef tagtable + +/* --- slots --- */ + +static +PyObject *mxTagTable_Repr(mxTagTableObject *self) +{ + char t[100]; + + if (self->tabletype == MXTAGTABLE_STRINGTYPE) + sprintf(t,"", (long)self); + else if (self->tabletype == MXTAGTABLE_UNICODETYPE) + sprintf(t,"", (long)self); + else + sprintf(t,"", (long)self); + return PyString_FromString(t); +} + +static +PyMethodDef mxTagTable_Methods[] = +{ + Py_MethodListEntryNoArgs("compiled",mxTagTable_compiled), +#ifdef COPY_PROTOCOL + Py_MethodListEntry("__deepcopy__",mxTagTable_copy), + Py_MethodListEntry("__copy__",mxTagTable_copy), +#endif + {NULL,NULL} /* end of list */ +}; + +static +PyMemberDef mxTagTable_Members[] = { + {"definition",T_OBJECT_EX,offsetof(mxTagTableObject,definition),READONLY,"Definition"}, + {NULL} +}; + +/* Python Type Tables */ + +PyTypeObject mxTagTable_Type = { + PyVarObject_HEAD_INIT(NULL, 0) /* init at startup ! */ + "Tag Table", /* tp_name */ + sizeof(mxTagTableObject), /* tp_basicsize */ + sizeof(mxTagTableEntry), /* tp_itemsize */ + /* methods */ + (destructor)mxTagTable_Free, /* tp_dealloc */ + (printfunc)0, /* tp_print */ + (getattrfunc)0, /* tp_getattr */ + (setattrfunc)0, /* tp_setattr */ + 0, /* tp_compare */ + (reprfunc)mxTagTable_Repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + (hashfunc)0, /* tp_hash */ + (ternaryfunc)0, /* tp_call */ + (reprfunc)0, /* tp_str */ + (getattrofunc)0, /* tp_getattro */ + (setattrofunc)0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + (char*) 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + mxTagTable_Methods, /* tp_methods */ + mxTagTable_Members, /* tp_members */ +}; + +/* --- Internal functions ----------------------------------------------*/ + +#ifdef HAVE_UNICODE + +/* Same as mxTextTools_Join() for Unicode objects. */ + +static +PyObject *mxTextTools_UnicodeJoin(PyObject *seq, + Py_ssize_t start, + Py_ssize_t stop, + PyObject *separator) +{ + PyObject *newstring = 0, *tempstr = 0; + Py_ssize_t newstring_len,current_len = 0; + Py_UNICODE *p; + Py_ssize_t i; + Py_UNICODE *sep; + Py_ssize_t sep_len; + + if (separator) { + separator = PyUnicode_FromObject(separator); + if (separator == NULL) + goto onError; + sep = PyUnicode_AS_UNICODE(separator); + sep_len = PyUnicode_GET_SIZE(separator); + } + else { + sep = NULL; + sep_len = 0; + } + + /* Create an empty new string */ + newstring_len = (10 + sep_len) * (stop - start); + newstring = PyUnicode_FromUnicode(NULL, newstring_len); + if (newstring == NULL) + goto onError; + p = PyUnicode_AS_UNICODE(newstring); + + /* Join with separator */ + for (i = start; i < stop; i++) { + register PyObject *o; + Py_UNICODE *st; + Py_ssize_t len_st; + + o = PySequence_GetItem(seq, i); + + if PyTuple_Check(o) { + /* Tuple entry: (string,l,r,[...]) */ + register Py_ssize_t l,r; + + /* parse tuple */ + Py_Assert((PyTuple_GET_SIZE(o) >= 3) && + PyInt_Check(PyTuple_GET_ITEM(o,1)) && + PyInt_Check(PyTuple_GET_ITEM(o,2)), + PyExc_TypeError, + "tuples must be of the format (string,l,r[,...])"); + tempstr = PyUnicode_FromObject(PyTuple_GET_ITEM(o,0)); + if (tempstr == NULL) + goto onError; + st = PyUnicode_AS_UNICODE(tempstr); + len_st = PyUnicode_GET_SIZE(tempstr); + l = PyInt_AS_LONG(PyTuple_GET_ITEM(o,1)); + r = PyInt_AS_LONG(PyTuple_GET_ITEM(o,2)); + + /* compute slice */ + if (r > len_st) r = len_st; + else if (r < 0) { + r += len_st + 1; + if (r < 0) + r = 0; + } + if (l > len_st) l = len_st; + else if (l < 0) { + l += len_st + 1; + if (l < 0) + l = 0; + } + + /* empty ? */ + if (l > r) + continue; + len_st = r - l; + if (len_st == 0) + continue; + + /* get pointer right */ + st += l; + } + else { + /* Must be a string entry: take the whole string */ + tempstr = PyUnicode_FromObject(o); + if (tempstr == NULL) + goto onError; + st = PyUnicode_AS_UNICODE(tempstr); + len_st = PyUnicode_GET_SIZE(tempstr); + } + + Py_DECREF(o); + + /* Resize the new string if needed */ + while (current_len + len_st + sep_len >= newstring_len) { + newstring_len += newstring_len >> 1; + if (PyUnicode_Resize(&newstring, newstring_len)) + goto onError; + p = PyUnicode_AS_UNICODE(newstring) + current_len; + } + + /* Insert separator */ + if (i > 0 && sep_len > 0) { + Py_UNICODE_COPY(p, sep, sep_len); + p += sep_len; + current_len += sep_len; + } + + /* Copy snippet into new string */ + Py_UNICODE_COPY(p, st, len_st); + p += len_st; + current_len += len_st; + + Py_DECREF(tempstr); + tempstr = NULL; + } + + /* Resize new string to the actual length */ + if (PyUnicode_Resize(&newstring, current_len)) + goto onError; + + Py_XDECREF(separator); + return newstring; + + onError: + Py_XDECREF(newstring); + Py_XDECREF(separator); + Py_XDECREF(tempstr); + return NULL; +} + +#endif + +/* Enhanced string join: also excepts tuple (text, left, right,...) + entries which then cause text[left:right] to be used as string + snippet. + + separator may be NULL; in that case, "" is used as separator. + +*/ + +static +PyObject *mxTextTools_Join(PyObject *seq, + Py_ssize_t start, + Py_ssize_t stop, + PyObject *separator) +{ + PyObject *newstring = 0; + Py_ssize_t newstring_len, current_len = 0; + char *p; + Py_ssize_t i; + char *sep; + Py_ssize_t sep_len; + + if (separator) { +#ifdef HAVE_UNICODE + if (PyUnicode_Check(separator)) + return mxTextTools_UnicodeJoin(seq, start, stop, separator); +#endif + Py_Assert(PyString_Check(separator), + PyExc_TypeError, + "separator must be a string"); + sep = PyString_AS_STRING(separator); + sep_len = PyString_GET_SIZE(separator); + } + else { + sep = NULL; + sep_len = 0; + } + + /* Create an empty new string */ + newstring_len = (10 + sep_len) * (stop - start); + newstring = PyString_FromStringAndSize((char*)NULL, newstring_len); + if (newstring == NULL) + goto onError; + p = PyString_AS_STRING(newstring); + + /* Join with separator */ + for (i = start; i < stop; i++) { + register PyObject *o; + char *st; + Py_ssize_t len_st; + + o = PySequence_GetItem(seq, i); + + if PyTuple_Check(o) { + /* Tuple entry: (string,l,r,[...]) */ + register Py_ssize_t l,r; + + /* parse tuple */ + Py_Assert((PyTuple_GET_SIZE(o) >= 3) && + PyInt_Check(PyTuple_GET_ITEM(o,1)) && + PyInt_Check(PyTuple_GET_ITEM(o,2)), + PyExc_TypeError, + "tuples must be of the format (string,int,int[,...])"); +#ifdef HAVE_UNICODE + if (PyUnicode_Check(PyTuple_GET_ITEM(o,0))) { + /* Redirect to Unicode implementation; all previous work + is lost. */ + Py_DECREF(o); + Py_DECREF(newstring); + return mxTextTools_UnicodeJoin(seq, start, stop, separator); + } +#endif + Py_Assert(PyString_Check(PyTuple_GET_ITEM(o,0)), + PyExc_TypeError, + "tuples must be of the format (string,int,int[,...])"); + st = PyString_AS_STRING(PyTuple_GET_ITEM(o,0)); + len_st = PyString_GET_SIZE(PyTuple_GET_ITEM(o,0)); + l = PyInt_AS_LONG(PyTuple_GET_ITEM(o,1)); + r = PyInt_AS_LONG(PyTuple_GET_ITEM(o,2)); + + /* compute slice */ + if (r > len_st) r = len_st; + else if (r < 0) { + r += len_st + 1; + if (r < 0) + r = 0; + } + if (l > len_st) l = len_st; + else if (l < 0) { + l += len_st + 1; + if (l < 0) + l = 0; + } + + /* empty ? */ + if (l > r) + continue; + len_st = r - l; + if (len_st == 0) + continue; + + /* get pointer right */ + st += l; + } + else if (PyString_Check(o)) { + /* String entry: take the whole string */ + st = PyString_AS_STRING(o); + len_st = PyString_GET_SIZE(o); + } +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(o)) { + /* Redirect to Unicode implementation; all previous work + is lost. */ + Py_DECREF(o); + Py_DECREF(newstring); + return mxTextTools_UnicodeJoin(seq, start, stop, separator); + } +#endif + else { + Py_DECREF(o); + Py_Error(PyExc_TypeError, + "list must contain tuples or strings as entries"); + } + + Py_DECREF(o); + + /* Resize the new string if needed */ + while (current_len + len_st + sep_len >= newstring_len) { + newstring_len += newstring_len >> 1; + if (_PyString_Resize(&newstring, newstring_len)) + goto onError; + p = PyString_AS_STRING(newstring) + current_len; + } + + /* Insert separator */ + if (i > 0 && sep_len > 0) { + memcpy(p, sep, sep_len); + p += sep_len; + current_len += sep_len; + } + + /* Copy snippet into new string */ + memcpy(p,st,len_st); + p += len_st; + current_len += len_st; + } + + /* Resize new string to the actual length */ + if (_PyString_Resize(&newstring, current_len)) + goto onError; + + return newstring; + + onError: + Py_XDECREF(newstring); + return NULL; +} + +static +PyObject *mxTextTools_HexStringFromString(char *str, + Py_ssize_t len) +{ + PyObject *w = 0; + Py_ssize_t i; + char *hex; + static const char hexdigits[] = "0123456789abcdef"; + + /* Convert to HEX */ + w = PyString_FromStringAndSize(NULL,2*len); + if (!w) + goto onError; + hex = PyString_AS_STRING(w); + for (i = 0; i < len; i ++) { + unsigned char c = (unsigned char)*str; + + *hex++ = hexdigits[c >> 4]; + *hex++ = hexdigits[c & 0x0F]; + str++; + } + return w; + + onError: + Py_XDECREF(w); + return NULL; +} + +static +PyObject *mxTextTools_StringFromHexString(char *hex, + Py_ssize_t len) +{ + PyObject *w = 0; + Py_ssize_t i; + char *str; + static const char hexdigits[] = "0123456789abcdef"; + + /* Convert to string */ + Py_Assert(len % 2 == 0, + PyExc_TypeError, + "need 2-digit hex string argument"); + len >>= 1; + w = PyString_FromStringAndSize(NULL,len); + if (!w) + goto onError; + str = PyString_AS_STRING(w); + for (i = 0; i < len; i++,str++) { + register char c; + register Py_ssize_t j; + + c = tolower(*hex++); + for (j = 0; j < (Py_ssize_t)sizeof(hexdigits); j++) + if (c == hexdigits[j]) { + *str = j << 4; + break; + } + if (j == sizeof(hexdigits)) { + DPRINTF("Failed: '%c' (%u) at %i\n",c,(unsigned int)c,i); + Py_Error(PyExc_ValueError, + "argument contains non-hex characters"); + } + + c = tolower(*hex++); + for (j = 0; j < (Py_ssize_t)sizeof(hexdigits); j++) + if (c == hexdigits[j]) { + *str += j; + break; + } + if (j == sizeof(hexdigits)) { + DPRINTF("Failed2: '%c' (%u) at %i\n",c,(unsigned int)c,i); + Py_Error(PyExc_ValueError, + "argument contains non-hex characters"); + } + } + return w; + + onError: + Py_XDECREF(w); + return NULL; +} + +static +int mxTextTools_IsASCII(PyObject *text, + Py_ssize_t left, + Py_ssize_t right) +{ + if (PyString_Check(text)) { + Py_ssize_t len; + register Py_ssize_t i; + register unsigned char *str = (unsigned char *)PyString_AS_STRING(text); + + len = PyString_GET_SIZE(text); + Py_CheckSequenceSlice(len, left, right); + for (i = left; i < right; i++) + if (str[i] >= 128) + return 0; + return 1; + } + +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) { + Py_ssize_t len; + register Py_ssize_t i; + register Py_UNICODE *str = PyUnicode_AS_UNICODE(text); + + len = PyUnicode_GET_SIZE(text); + Py_CheckSequenceSlice(len, left, right); + for (i = left; i < right; i++) + if (str[i] >= 128) + return 0; + return 1; + } +#endif + + else + Py_Error(PyExc_TypeError, + "need string object"); + + onError: + return -1; +} + +/* Takes a list of tuples (replacement,l,r,...) and produces a taglist + suitable for mxTextTools_Join() which creates a copy of + text where every slice [l:r] is replaced by the given replacement. + +*/ + +static +PyObject *mxTextTools_Joinlist(PyObject *text, + PyObject *list, + Py_ssize_t pos, + Py_ssize_t text_len) +{ + PyObject *joinlist = 0; + Py_ssize_t list_len; + Py_ssize_t i; + Py_ssize_t listitem = 0; + Py_ssize_t listsize = INITIAL_LIST_SIZE; + + if (PyString_Check(text)) { + Py_CheckStringSlice(text, pos, text_len); + } +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) { + Py_CheckUnicodeSlice(text, pos, text_len); + } +#endif + else + Py_Error(PyExc_TypeError, + "expected string or unicode"); + + Py_Assert(PyList_Check(list), + PyExc_TypeError, + "expected a list of tuples as second argument"); + list_len = PyList_GET_SIZE(list); + + joinlist = PyList_New(listsize); + if (joinlist == NULL) + goto onError; + + for (i = 0; i < list_len; i++) { + register PyObject *t; + register Py_ssize_t left, right; + + t = PyList_GET_ITEM(list, i); + Py_Assert(PyTuple_Check(t) && + (PyTuple_GET_SIZE(t) >= 3) && + (PyString_Check(PyTuple_GET_ITEM(t,0)) || + PyUnicode_Check(PyTuple_GET_ITEM(t,0))) && + PyInt_Check(PyTuple_GET_ITEM(t,1)) && + PyInt_Check(PyTuple_GET_ITEM(t,2)), + PyExc_TypeError, + "tuples must be of the form (string,int,int,...)"); + left = PyInt_AS_LONG(PyTuple_GET_ITEM(t,1)); + right = PyInt_AS_LONG(PyTuple_GET_ITEM(t,2)); + + Py_Assert(left >= pos, + PyExc_ValueError, + "list is not sorted ascending"); + + if (left > pos) { /* joinlist.append((text,pos,left)) */ + register PyObject *v; + register PyObject *w; + + v = PyTuple_New(3); + if (v == NULL) + goto onError; + + Py_INCREF(text); + PyTuple_SET_ITEM(v,0,text); + + w = PyInt_FromLong(pos); + if (w == NULL) + goto onError; + PyTuple_SET_ITEM(v,1,w); + + w = PyTuple_GET_ITEM(t,1); + Py_INCREF(w); + PyTuple_SET_ITEM(v,2,w); + + if (listitem < listsize) + PyList_SET_ITEM(joinlist,listitem,v); + else { + PyList_Append(joinlist,v); + Py_DECREF(v); + } + listitem++; + } + + /* joinlist.append(string) */ + if (listitem < listsize) { + register PyObject *v = PyTuple_GET_ITEM(t,0); + Py_INCREF(v); + PyList_SET_ITEM(joinlist,listitem,v); + } + else + PyList_Append(joinlist,PyTuple_GET_ITEM(t,0)); + listitem++; + + pos = right; + } + + if (pos < text_len) { /* joinlist.append((text,pos,text_len)) */ + register PyObject *v; + register PyObject *w; + + v = PyTuple_New(3); + if (v == NULL) + goto onError; + + Py_INCREF(text); + PyTuple_SET_ITEM(v,0,text); + + w = PyInt_FromLong(pos); + if (w == NULL) + goto onError; + PyTuple_SET_ITEM(v,1,w); + + w = PyInt_FromLong(text_len); + if (w == NULL) + goto onError; + PyTuple_SET_ITEM(v,2,w); + + if (listitem < listsize) + PyList_SET_ITEM(joinlist,listitem,v); + else { + PyList_Append(joinlist,v); + Py_DECREF(v); + } + listitem++; + } + + /* Resize list if necessary */ + if (listitem < listsize) + PyList_SetSlice(joinlist,listitem,listsize,(PyObject*)NULL); + + return joinlist; + + onError: + + Py_XDECREF(joinlist); + return NULL; +} + +#ifdef HAVE_UNICODE +static +PyObject *mxTextTools_UnicodeCharSplit(PyObject *text, + PyObject *separator, + Py_ssize_t start, + Py_ssize_t text_len) +{ + PyObject *list = NULL; + register Py_ssize_t x; + Py_ssize_t listitem = 0; + Py_ssize_t listsize = INITIAL_LIST_SIZE; + Py_UNICODE *tx; + Py_UNICODE sep; + + text = PyUnicode_FromObject(text); + if (text == NULL) { + separator = NULL; + goto onError; + } + separator = PyUnicode_FromObject(separator); + if (separator == NULL) + goto onError; + + Py_CheckUnicodeSlice(text, start, text_len); + + Py_Assert(PyUnicode_GET_SIZE(separator) == 1, + PyExc_TypeError, + "separator must be a single character"); + + tx = PyUnicode_AS_UNICODE(text); + sep = *PyUnicode_AS_UNICODE(separator); + + list = PyList_New(listsize); + if (!list) + goto onError; + + x = start; + while (1) { + PyObject *s; + register Py_ssize_t z; + + /* Skip to next separator */ + z = x; + for (;x < text_len; x++) + if (tx[x] == sep) + break; + + /* Append the slice to list */ + s = PyUnicode_FromUnicode(&tx[z], x - z); + if (!s) + goto onError; + if (listitem < listsize) + PyList_SET_ITEM(list,listitem,s); + else { + PyList_Append(list,s); + Py_DECREF(s); + } + listitem++; + + if (x == text_len) + break; + + /* Skip separator */ + x++; + } + + /* Resize list if necessary */ + if (listitem < listsize) + PyList_SetSlice(list,listitem,listsize,(PyObject*)NULL); + + Py_DECREF(text); + Py_DECREF(separator); + return list; + + onError: + Py_XDECREF(list); + Py_XDECREF(text); + Py_XDECREF(separator); + return NULL; +} +#endif + +static +PyObject *mxTextTools_CharSplit(PyObject *text, + PyObject *separator, + Py_ssize_t start, + Py_ssize_t text_len) +{ + PyObject *list = 0; + register Py_ssize_t x; + Py_ssize_t listitem = 0; + Py_ssize_t listsize = INITIAL_LIST_SIZE; + char *tx; + char sep; + +#ifdef HAVE_UNICODE + if (PyUnicode_Check(text) || PyUnicode_Check(separator)) + return mxTextTools_UnicodeCharSplit(text, separator, + start, text_len); +#endif + + if (PyString_Check(text) && PyString_Check(separator)) { + Py_CheckStringSlice(text, start, text_len); + } + else + Py_Error(PyExc_TypeError, + "text and separator must be strings or unicode"); + + Py_Assert(PyString_GET_SIZE(separator) == 1, + PyExc_TypeError, + "separator must be a single character"); + + tx = PyString_AS_STRING(text); + sep = *PyString_AS_STRING(separator); + + list = PyList_New(listsize); + if (!list) + goto onError; + + x = start; + while (1) { + PyObject *s; + register Py_ssize_t z; + + /* Skip to next separator */ + z = x; + for (;x < text_len; x++) + if (tx[x] == sep) + break; + + /* Append the slice to list */ + s = PyString_FromStringAndSize(&tx[z], x - z); + if (!s) + goto onError; + if (listitem < listsize) + PyList_SET_ITEM(list,listitem,s); + else { + PyList_Append(list,s); + Py_DECREF(s); + } + listitem++; + + if (x == text_len) + break; + + /* Skip separator */ + x++; + } + + /* Resize list if necessary */ + if (listitem < listsize) + PyList_SetSlice(list,listitem,listsize,(PyObject*)NULL); + + return list; + + onError: + Py_XDECREF(list); + return NULL; +} + +#ifdef HAVE_UNICODE +static +PyObject *mxTextTools_UnicodeSplitAt(PyObject *text, + PyObject *separator, + Py_ssize_t nth, + Py_ssize_t start, + Py_ssize_t text_len) +{ + PyObject *tuple = 0; + register Py_ssize_t x; + PyObject *s; + Py_UNICODE *tx; + Py_UNICODE sep; + + text = PyUnicode_FromObject(text); + if (text == NULL) { + separator = NULL; + goto onError; + } + separator = PyUnicode_FromObject(separator); + if (separator == NULL) + goto onError; + + Py_CheckUnicodeSlice(text, start, text_len); + + Py_Assert(PyUnicode_GET_SIZE(separator) == 1, + PyExc_TypeError, + "separator must be a single character"); + + tx = PyUnicode_AS_UNICODE(text); + sep = *PyUnicode_AS_UNICODE(separator); + + tuple = PyTuple_New(2); + if (!tuple) + goto onError; + + if (nth > 0) { + /* Skip to nth separator from the left */ + x = start; + while (1) { + for (; x < text_len; x++) + if (tx[x] == sep) + break; + if (--nth == 0 || x == text_len) + break; + x++; + } + } + else if (nth < 0) { + /* Skip to nth separator from the right */ + x = text_len - 1; + while (1) { + for (; x >= start; x--) + if (tx[x] == sep) + break; + if (++nth == 0 || x < start) + break; + x--; + } + } + else + Py_Error(PyExc_ValueError, + "nth must be non-zero"); + + /* Add to tuple */ + if (x < start) + s = PyUnicode_FromUnicode((Py_UNICODE *)"", 0); + else + s = PyUnicode_FromUnicode(&tx[start], x - start); + if (!s) + goto onError; + PyTuple_SET_ITEM(tuple,0,s); + + /* Skip separator */ + x++; + + if (x >= text_len) + s = PyUnicode_FromUnicode((Py_UNICODE *)"", 0); + else + s = PyUnicode_FromUnicode(&tx[x], text_len - x); + if (!s) + goto onError; + PyTuple_SET_ITEM(tuple,1,s); + + Py_DECREF(text); + Py_DECREF(separator); + return tuple; + + onError: + Py_XDECREF(tuple); + Py_XDECREF(text); + Py_XDECREF(separator); + return NULL; +} +#endif + +static +PyObject *mxTextTools_SplitAt(PyObject *text, + PyObject *separator, + Py_ssize_t nth, + Py_ssize_t start, + Py_ssize_t text_len) +{ + PyObject *tuple = 0; + register Py_ssize_t x; + PyObject *s; + char *tx; + char sep; + +#ifdef HAVE_UNICODE + if (PyUnicode_Check(text) || PyUnicode_Check(separator)) + return mxTextTools_UnicodeSplitAt(text, separator, + nth, start, text_len); +#endif + + if (PyString_Check(text) && PyString_Check(separator)) { + Py_CheckStringSlice(text, start, text_len); + } + else + Py_Error(PyExc_TypeError, + "text and separator must be strings or unicode"); + + Py_Assert(PyString_GET_SIZE(separator) == 1, + PyExc_TypeError, + "separator must be a single character"); + + tx = PyString_AS_STRING(text); + sep = *PyString_AS_STRING(separator); + + tuple = PyTuple_New(2); + if (!tuple) + goto onError; + + if (nth > 0) { + /* Skip to nth separator from the left */ + x = start; + while (1) { + for (; x < text_len; x++) + if (tx[x] == sep) + break; + if (--nth == 0 || x == text_len) + break; + x++; + } + } + else if (nth < 0) { + /* Skip to nth separator from the right */ + x = text_len - 1; + while (1) { + for (; x >= start; x--) + if (tx[x] == sep) + break; + if (++nth == 0 || x < start) + break; + x--; + } + } + else + Py_Error(PyExc_ValueError, + "nth must be non-zero"); + + /* Add to tuple */ + if (x < start) + s = PyString_FromStringAndSize("",0); + else + s = PyString_FromStringAndSize(&tx[start], x - start); + if (!s) + goto onError; + PyTuple_SET_ITEM(tuple,0,s); + + /* Skip separator */ + x++; + + if (x >= text_len) + s = PyString_FromStringAndSize("",0); + else + s = PyString_FromStringAndSize(&tx[x], text_len - x); + if (!s) + goto onError; + PyTuple_SET_ITEM(tuple,1,s); + + return tuple; + + onError: + Py_XDECREF(tuple); + return NULL; +} + +#ifdef HAVE_UNICODE +static +PyObject *mxTextTools_UnicodeSuffix(PyObject *text, + PyObject *suffixes, + Py_ssize_t start, + Py_ssize_t text_len, + PyObject *translate) +{ + Py_ssize_t i; + Py_UNICODE *tx; + + text = PyUnicode_FromObject(text); + if (text == NULL) + goto onError; + + if (PyUnicode_Check(text)) { + Py_CheckUnicodeSlice(text, start, text_len); + } + else + Py_Error(PyExc_TypeError, + "expected unicode"); + Py_Assert(PyTuple_Check(suffixes), + PyExc_TypeError, + "suffixes needs to be a tuple of unicode strings"); + + /* XXX Add support for translate... */ + Py_Assert(translate == NULL, + PyExc_TypeError, + "translate is not supported for Unicode suffix()es"); + + tx = PyUnicode_AS_UNICODE(text); + + for (i = 0; i < PyTuple_GET_SIZE(suffixes); i++) { + PyObject *suffix = PyTuple_GET_ITEM(suffixes,i); + Py_ssize_t start_cmp; + + suffix = PyUnicode_FromObject(suffix); + if (suffix == NULL) + goto onError; + + start_cmp = text_len - PyUnicode_GET_SIZE(suffix); + if (start_cmp >= start && + PyUnicode_AS_UNICODE(suffix)[0] == tx[start_cmp] && + memcmp(PyUnicode_AS_UNICODE(suffix), + &tx[start_cmp], + PyUnicode_GET_DATA_SIZE(suffix)) == 0) { + Py_DECREF(text); + return suffix; + } + + Py_DECREF(suffix); + } + + Py_DECREF(text); + Py_ReturnNone(); + + onError: + Py_XDECREF(text); + return NULL; +} +#endif + +static +PyObject *mxTextTools_Suffix(PyObject *text, + PyObject *suffixes, + Py_ssize_t start, + Py_ssize_t text_len, + PyObject *translate) +{ + Py_ssize_t i; + char *tx; + +#ifdef HAVE_UNICODE + if (PyUnicode_Check(text)) + return mxTextTools_UnicodeSuffix(text, suffixes, + start, text_len, + translate); +#endif + + if (PyString_Check(text)) { + Py_CheckStringSlice(text, start, text_len); + } + else + Py_Error(PyExc_TypeError, + "expected string or unicode"); + Py_Assert(PyTuple_Check(suffixes), + PyExc_TypeError, + "suffixes needs to be a tuple of strings"); + tx = PyString_AS_STRING(text); + + if (translate) { + char *tr; + + Py_Assert(PyString_Check(translate) && + PyString_GET_SIZE(translate) == 256, + PyExc_TypeError, + "translate must be a string having 256 characters"); + tr = PyString_AS_STRING(translate); + + for (i = 0; i < PyTuple_GET_SIZE(suffixes); i++) { + PyObject *suffix = PyTuple_GET_ITEM(suffixes, i); + Py_ssize_t start_cmp; + register char *s; + register char *t; + register Py_ssize_t j; + + Py_AssertWithArg(PyString_Check(suffix), + PyExc_TypeError, + "tuple entry %d is not a string",(unsigned int)i); + start_cmp = text_len - PyString_GET_SIZE(suffix); + if (start_cmp < start) + continue; + + /* Do the compare using a translate table */ + s = PyString_AS_STRING(suffix); + t = tx + start_cmp; + for (j = start_cmp; j < text_len; j++, s++, t++) + if (*s != tr[(unsigned char)*t]) + break; + if (j == text_len) { + Py_INCREF(suffix); + return suffix; + } + } + } + + else + for (i = 0; i < PyTuple_GET_SIZE(suffixes); i++) { + PyObject *suffix = PyTuple_GET_ITEM(suffixes,i); + Py_ssize_t start_cmp; + + Py_AssertWithArg(PyString_Check(suffix), + PyExc_TypeError, + "tuple entry %d is not a string",(unsigned int)i); + start_cmp = text_len - PyString_GET_SIZE(suffix); + if (start_cmp < start) + continue; + + /* Compare without translate table */ + if (PyString_AS_STRING(suffix)[0] == tx[start_cmp] + && + strncmp(PyString_AS_STRING(suffix), + &tx[start_cmp], + PyString_GET_SIZE(suffix)) == 0) { + Py_INCREF(suffix); + return suffix; + } + } + + Py_ReturnNone(); + + onError: + return NULL; +} + +#ifdef HAVE_UNICODE +static +PyObject *mxTextTools_UnicodePrefix(PyObject *text, + PyObject *prefixes, + Py_ssize_t start, + Py_ssize_t text_len, + PyObject *translate) +{ + Py_ssize_t i; + Py_UNICODE *tx; + + text = PyUnicode_FromObject(text); + if (text == NULL) + goto onError; + + if (PyUnicode_Check(text)) { + Py_CheckUnicodeSlice(text, start, text_len); + } + else + Py_Error(PyExc_TypeError, + "expected unicode"); + Py_Assert(PyTuple_Check(prefixes), + PyExc_TypeError, + "prefixes needs to be a tuple of unicode strings"); + + /* XXX Add support for translate... */ + Py_Assert(translate == NULL, + PyExc_TypeError, + "translate is not supported for Unicode prefix()es"); + + tx = PyUnicode_AS_UNICODE(text); + + for (i = 0; i < PyTuple_GET_SIZE(prefixes); i++) { + PyObject *prefix = PyTuple_GET_ITEM(prefixes,i); + + prefix = PyUnicode_FromObject(prefix); + if (prefix == NULL) + goto onError; + + /* Compare without translate table */ + if (start + PyString_GET_SIZE(prefix) <= text_len && + PyUnicode_AS_UNICODE(prefix)[0] == tx[start] && + memcmp(PyUnicode_AS_UNICODE(prefix), + &tx[start], + PyUnicode_GET_DATA_SIZE(prefix)) == 0) { + Py_INCREF(prefix); + return prefix; + } + + Py_DECREF(prefix); + } + + Py_DECREF(text); + Py_ReturnNone(); + + onError: + Py_XDECREF(text); + return NULL; +} +#endif + +static +PyObject *mxTextTools_Prefix(PyObject *text, + PyObject *prefixes, + Py_ssize_t start, + Py_ssize_t text_len, + PyObject *translate) +{ + Py_ssize_t i; + char *tx; + +#ifdef HAVE_UNICODE + if (PyUnicode_Check(text)) + return mxTextTools_UnicodePrefix(text, prefixes, + start, text_len, + translate); +#endif + + if (PyString_Check(text)) { + Py_CheckStringSlice(text, start, text_len); + } + else + Py_Error(PyExc_TypeError, + "expected string or unicode"); + Py_Assert(PyTuple_Check(prefixes), + PyExc_TypeError, + "prefixes needs to be a tuple of strings"); + tx = PyString_AS_STRING(text); + + if (translate) { + char *tr; + + Py_Assert(PyString_Check(translate) && + PyString_GET_SIZE(translate) == 256, + PyExc_TypeError, + "translate must be a string having 256 characters"); + tr = PyString_AS_STRING(translate); + + for (i = 0; i < PyTuple_GET_SIZE(prefixes); i++) { + PyObject *prefix = PyTuple_GET_ITEM(prefixes,i); + Py_ssize_t cmp_len; + register char *s; + register char *t; + register Py_ssize_t j; + + Py_AssertWithArg(PyString_Check(prefix), + PyExc_TypeError, + "tuple entry %d is not a string",(unsigned int)i); + cmp_len = PyString_GET_SIZE(prefix); + if (start + cmp_len > text_len) + continue; + + /* Do the compare using a translate table */ + s = PyString_AS_STRING(prefix); + t = tx + start; + for (j = 0; j < cmp_len; j++, s++, t++) + if (*s != tr[(unsigned char)*t]) + break; + if (j == cmp_len) { + Py_INCREF(prefix); + return prefix; + } + } + } + + else + for (i = 0; i < PyTuple_GET_SIZE(prefixes); i++) { + PyObject *prefix = PyTuple_GET_ITEM(prefixes,i); + + Py_AssertWithArg(PyString_Check(prefix), + PyExc_TypeError, + "tuple entry %d is not a string",(unsigned int)i); + if (start + PyString_GET_SIZE(prefix) > text_len) + continue; + + /* Compare without translate table */ + if (PyString_AS_STRING(prefix)[0] == tx[start] && + strncmp(PyString_AS_STRING(prefix), + &tx[start], + PyString_GET_SIZE(prefix)) == 0) { + Py_INCREF(prefix); + return prefix; + } + } + + Py_ReturnNone(); + + onError: + return NULL; +} + +/* Stips off characters appearing in the character set from text[start:stop] + and returns the result as Python string object. + + where indicates the mode: + where < 0: strip left only + where = 0: strip left and right + where > 0: strip right only + +*/ +static +PyObject *mxTextTools_SetStrip(char *tx, + Py_ssize_t tx_len, + char *setstr, + Py_ssize_t setstr_len, + Py_ssize_t start, + Py_ssize_t stop, + Py_ssize_t where) +{ + Py_ssize_t left, right; + + Py_Assert(setstr_len == 32, + PyExc_TypeError, + "separator needs to be a set as obtained from set()"); + Py_CheckBufferSlice(tx_len, start, stop); + + /* Strip left */ + if (where <= 0) { + register Py_ssize_t x; + for (x = start; x < stop; x++) + if (!Py_CharInSet(tx[x], setstr)) + break; + left = x; + } + else + left = start; + + /* Strip right */ + if (where >= 0) { + register Py_ssize_t x; + for (x = stop - 1; x >= start; x--) + if (!Py_CharInSet(tx[x], setstr)) + break; + right = x + 1; + } + else + right = stop; + + return PyString_FromStringAndSize(tx + left, max(right - left, 0)); + + onError: + return NULL; +} + +static +PyObject *mxTextTools_SetSplit(char *tx, + Py_ssize_t tx_len, + char *setstr, + Py_ssize_t setstr_len, + Py_ssize_t start, + Py_ssize_t text_len) +{ + PyObject *list = NULL; + register Py_ssize_t x; + Py_ssize_t listitem = 0; + Py_ssize_t listsize = INITIAL_LIST_SIZE; + + Py_Assert(setstr_len == 32, + PyExc_TypeError, + "separator needs to be a set as obtained from set()"); + Py_CheckBufferSlice(tx_len,start,text_len); + + list = PyList_New(listsize); + if (!list) + goto onError; + + x = start; + while (x < text_len) { + Py_ssize_t z; + + /* Skip all text in set */ + for (;x < text_len; x++) { + register Py_ssize_t c = (unsigned char)tx[x]; + register Py_ssize_t block = (unsigned char)setstr[c >> 3]; + if (!block || ((block & (1 << (c & 7))) == 0)) + break; + } + + /* Skip all text not in set */ + z = x; + for (;x < text_len; x++) { + register Py_ssize_t c = (unsigned char)tx[x]; + register Py_ssize_t block = (unsigned char)setstr[c >> 3]; + if (block && ((block & (1 << (c & 7))) != 0)) + break; + } + + /* Append the slice to list if it is not empty */ + if (x > z) { + PyObject *s; + s = PyString_FromStringAndSize((char *)&tx[z], x - z); + if (!s) + goto onError; + if (listitem < listsize) + PyList_SET_ITEM(list,listitem,s); + else { + PyList_Append(list,s); + Py_DECREF(s); + } + listitem++; + } + } + + /* Resize list if necessary */ + if (listitem < listsize) + PyList_SetSlice(list,listitem,listsize,(PyObject*)NULL); + + return list; + + onError: + Py_XDECREF(list); + return NULL; +} + +static +PyObject *mxTextTools_SetSplitX(char *tx, + Py_ssize_t tx_len, + char *setstr, + Py_ssize_t setstr_len, + Py_ssize_t start, + Py_ssize_t text_len) +{ + PyObject *list = NULL; + register Py_ssize_t x; + Py_ssize_t listitem = 0; + Py_ssize_t listsize = INITIAL_LIST_SIZE; + + Py_Assert(setstr_len == 32, + PyExc_TypeError, + "separator needs to be a set as obtained from set()"); + Py_CheckBufferSlice(tx_len,start,text_len); + + list = PyList_New(listsize); + if (!list) + goto onError; + + x = start; + while (x < text_len) { + PyObject *s; + register Py_ssize_t z; + + /* Skip all text not in set */ + z = x; + for (;x < text_len; x++) { + register unsigned int c = (unsigned char)tx[x]; + register unsigned int block = (unsigned char)setstr[c >> 3]; + if (block && ((block & (1 << (c & 7))) != 0)) + break; + } + + /* Append the slice to list */ + s = PyString_FromStringAndSize((char *)&tx[z], x - z); + if (!s) + goto onError; + if (listitem < listsize) + PyList_SET_ITEM(list,listitem,s); + else { + PyList_Append(list,s); + Py_DECREF(s); + } + listitem++; + + if (x >= text_len) + break; + + /* Skip all text in set */ + z = x; + for (;x < text_len; x++) { + register unsigned int c = (unsigned char)tx[x]; + register unsigned int block = (unsigned char)setstr[c >> 3]; + if (!block || ((block & (1 << (c & 7))) == 0)) + break; + } + + /* Append the slice to list if it is not empty */ + s = PyString_FromStringAndSize((char *)&tx[z], x - z); + if (!s) + goto onError; + if (listitem < listsize) + PyList_SET_ITEM(list,listitem,s); + else { + PyList_Append(list,s); + Py_DECREF(s); + } + listitem++; + } + + /* Resize list if necessary */ + if (listitem < listsize) + PyList_SetSlice(list,listitem,listsize,(PyObject*)NULL); + + return list; + + onError: + Py_XDECREF(list); + return NULL; +} + +static +PyObject *mxTextTools_Upper(PyObject *text) +{ + PyObject *ntext; + register unsigned char *s; + register unsigned char *orig; + register Py_ssize_t i; + unsigned char *tr; + Py_ssize_t len; + + Py_Assert(PyString_Check(text), + PyExc_TypeError, + "expected a Python string"); + + len = PyString_GET_SIZE(text); + ntext = PyString_FromStringAndSize(NULL,len); + if (!ntext) + goto onError; + + /* Translate */ + tr = (unsigned char *)PyString_AS_STRING(mx_ToUpper); + orig = (unsigned char *)PyString_AS_STRING(text); + s = (unsigned char *)PyString_AS_STRING(ntext); + for (i = 0; i < len; i++, s++, orig++) + *s = tr[*orig]; + + return ntext; + + onError: + return NULL; +} + +#ifdef HAVE_UNICODE +static +PyObject *mxTextTools_UnicodeUpper(PyObject *text) +{ + PyObject *ntext; + register Py_UNICODE *s; + register Py_UNICODE *orig; + register Py_ssize_t i; + Py_ssize_t len; + + text = PyUnicode_FromObject(text); + if (text == NULL) + goto onError; + + len = PyUnicode_GET_SIZE(text); + ntext = PyUnicode_FromUnicode(NULL, len); + if (!ntext) + goto onError; + + /* Translate */ + orig = (Py_UNICODE *)PyUnicode_AS_UNICODE(text); + s = (Py_UNICODE *)PyUnicode_AS_UNICODE(ntext); + for (i = 0; i < len; i++, s++, orig++) + *s = Py_UNICODE_TOUPPER(*orig); + + Py_DECREF(text); + return ntext; + + onError: + Py_XDECREF(text); + return NULL; +} +#endif + +static +PyObject *mxTextTools_Lower(PyObject *text) +{ + PyObject *ntext; + register unsigned char *s; + register unsigned char *orig; + register Py_ssize_t i; + unsigned char *tr; + Py_ssize_t len; + + Py_Assert(PyString_Check(text), + PyExc_TypeError, + "expected a Python string"); + + len = PyString_GET_SIZE(text); + ntext = PyString_FromStringAndSize(NULL,len); + if (!ntext) + goto onError; + + /* Translate */ + tr = (unsigned char *)PyString_AS_STRING(mx_ToLower); + orig = (unsigned char *)PyString_AS_STRING(text); + s = (unsigned char *)PyString_AS_STRING(ntext); + for (i = 0; i < len; i++, s++, orig++) + *s = tr[*orig]; + + return ntext; + + onError: + return NULL; +} + +#ifdef HAVE_UNICODE +static +PyObject *mxTextTools_UnicodeLower(PyObject *text) +{ + PyObject *ntext; + register Py_UNICODE *s; + register Py_UNICODE *orig; + register Py_ssize_t i; + Py_ssize_t len; + + text = PyUnicode_FromObject(text); + if (text == NULL) + goto onError; + + len = PyUnicode_GET_SIZE(text); + ntext = PyUnicode_FromUnicode(NULL, len); + if (!ntext) + goto onError; + + /* Translate */ + orig = (Py_UNICODE *)PyUnicode_AS_UNICODE(text); + s = (Py_UNICODE *)PyUnicode_AS_UNICODE(ntext); + for (i = 0; i < len; i++, s++, orig++) + *s = Py_UNICODE_TOLOWER(*orig); + + Py_DECREF(text); + return ntext; + + onError: + Py_XDECREF(text); + return NULL; +} +#endif + +/* --- Module functions ------------------------------------------------*/ + +/* Interface to the tagging engine in mxte.c */ + +Py_C_Function_WithKeywords( + mxTextTools_tag, + "tag(text,tagtable,sliceleft=0,sliceright=len(text),taglist=[],context=None) \n""" + "Produce a tag list for a string, given a tag-table\n" + "- returns a tuple (success, taglist, nextindex)\n" + "- if taglist == None, then no taglist is created" + ) +{ + PyObject *text; + PyObject *tagtable; + Py_ssize_t sliceright = INT_MAX; + Py_ssize_t sliceleft = 0; + PyObject *taglist = 0; + Py_ssize_t taglist_len; + PyObject *context = 0; + Py_ssize_t next, result; + PyObject *res; + + Py_KeywordsGet6Args("OO|iiOO:tag", + text,tagtable,sliceleft,sliceright,taglist,context); + + if (taglist == NULL) { + /* not given, so use default: an empty list */ + taglist = PyList_New(0); + if (taglist == NULL) + goto onError; + taglist_len = 0; + } + else { + Py_INCREF(taglist); + Py_Assert(PyList_Check(taglist) || taglist == Py_None, + PyExc_TypeError, + "taglist must be a list or None"); + if (taglist != Py_None) { + taglist_len = PyList_Size(taglist); + if (taglist_len < 0) + goto onError; + } + else + taglist_len = 0; + } + + Py_Assert(mxTagTable_Check(tagtable) || + PyTuple_Check(tagtable) || + PyList_Check(tagtable), + PyExc_TypeError, + "tagtable must be a TagTable instance, list or tuple"); + + /* Prepare the argument for the Tagging Engine and let it process + the request */ + if (PyString_Check(text)) { + + Py_CheckStringSlice(text, sliceleft, sliceright); + + if (!mxTagTable_Check(tagtable)) { + tagtable = mxTagTable_New(tagtable, MXTAGTABLE_STRINGTYPE, 1); + if (tagtable == NULL) + goto onError; + } + else if (mxTagTable_Type(tagtable) != MXTAGTABLE_STRINGTYPE) { + Py_Error(PyExc_TypeError, + "TagTable instance is not intended for parsing strings"); + } + else + Py_INCREF(tagtable); + + /* Call the Tagging Engine */ + result = mxTextTools_TaggingEngine(text, + sliceleft, + sliceright, + (mxTagTableObject *)tagtable, + taglist, + context, + &next); + Py_DECREF(tagtable); + + } +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) { + + Py_CheckUnicodeSlice(text, sliceleft, sliceright); + + if (!mxTagTable_Check(tagtable)) { + tagtable = mxTagTable_New(tagtable, 1, 1); + if (tagtable == NULL) + goto onError; + } + else if (mxTagTable_Type(tagtable) != MXTAGTABLE_UNICODETYPE) { + Py_Error(PyExc_TypeError, + "TagTable instance is not intended for parsing Unicode"); + } + else + Py_INCREF(tagtable); + + /* Call the Tagging Engine */ + result = mxTextTools_UnicodeTaggingEngine(text, + sliceleft, + sliceright, + (mxTagTableObject *)tagtable, + taglist, + context, + &next); + Py_DECREF(tagtable); + + } +#endif + else + Py_Error(PyExc_TypeError, + "text must be a string or unicode"); + + /* Check for exceptions during matching */ + if (result == 0) + goto onError; + + /* Undo changes to taglist in case of a match failure (result == 1) */ + if (result == 1 && taglist != Py_None) { + DPRINTF(" undoing changes: del taglist[%i:%i]\n", + taglist_len, PyList_Size(taglist)); + if (PyList_SetSlice(taglist, + taglist_len, + PyList_Size(taglist), + NULL)) + goto onError; + } + + /* Convert result to the documented external values: + 0 - no match, 1 - match. */ + result--; + + /* Build result tuple */ + res = PyTuple_New(3); + if (!res) + goto onError; + PyTuple_SET_ITEM(res,0,PyInt_FromLong(result)); + PyTuple_SET_ITEM(res,1,taglist); + PyTuple_SET_ITEM(res,2,PyInt_FromLong(next)); + return res; + + onError: + if (!PyErr_Occurred()) + Py_Error(PyExc_SystemError, + "NULL result without error in builtin tag()"); + Py_XDECREF(taglist); + return NULL; +} + +/* An extended version of string.join() for taglists: */ + +Py_C_Function( mxTextTools_join, + "join(joinlist,sep='',start=0,stop=len(joinlist))\n\n" + "Copy snippets from different strings together producing a\n" + "new string\n" + "The first argument must be a list of tuples or strings;\n" + "tuples must be of the form (string,l,r[,...]) and turn out\n" + "as string[l:r]\n" + "NOTE: the syntax used for negative slices is different\n" + "than the Python standard: -1 corresponds to the first\n" + "character *after* the string, e.g. ('Example',0,-1) gives\n" + "'Example' and not 'Exampl', like in Python\n" + "sep is an optional separator string, start and stop\n" + "define the slice of joinlist that is taken into accont." + ) +{ + PyObject *joinlist = NULL; + Py_ssize_t joinlist_len; + PyObject *separator = NULL; + Py_ssize_t start=0, stop=INT_MAX; + + Py_Get4Args("O|Oii:join", + joinlist,separator,start,stop); + + Py_Assert(PySequence_Check(joinlist), + PyExc_TypeError, + "first argument needs to be a sequence"); + + joinlist_len = PySequence_Length(joinlist); + Py_Assert(joinlist_len >= 0, + PyExc_TypeError, + "first argument needs to have a __len__ method"); + + Py_CheckSequenceSlice(joinlist_len, start, stop); + + /* Short-cut */ + if ((stop - start) <= 0) + return PyString_FromString(""); + + return mxTextTools_Join(joinlist, + start, stop, + separator); + + onError: + return NULL; +} + +/* + Special compare function for taglist-tuples, comparing + the text-slices given: + - slices starting at a smaller index come first + - for slices starting at the same index, the longer one + wins +*/ + +Py_C_Function( mxTextTools_cmp, + "cmp(a,b)\n\n" + "Compare two valid taglist tuples w/r to their slice\n" + "position; this is useful for sorting joinlists.") +{ + PyObject *v,*w; + short index; + int cmp; + + Py_Get2Args("OO:cmp",v,w); + + Py_Assert(PyTuple_Check(v) && PyTuple_Check(w) && + PyTuple_GET_SIZE(v) >= 3 && PyTuple_GET_SIZE(w) >= 3, + PyExc_TypeError, + "invalid taglist-tuple"); + + for (index = 1; index < 3; index++) { + cmp = PyObject_RichCompareBool(PyTuple_GET_ITEM(v,1),PyTuple_GET_ITEM(w,1),Py_LT); + if (cmp) + return PyInt_FromLong(cmp); + cmp = PyObject_RichCompareBool(PyTuple_GET_ITEM(v,2),PyTuple_GET_ITEM(w,2), Py_GT); + if (cmp) + return PyInt_FromLong(cmp); + } + return PyInt_FromLong(0); + + onError: + return NULL; +} + +Py_C_Function( mxTextTools_joinlist, + "joinlist(text,list,start=0,stop=len(text))\n\n" + "Takes a list of tuples (replacement,l,r,...) and produces\n" + "a taglist suitable for join() which creates a copy\n" + "of text where every slice [l:r] is replaced by the\n" + "given replacement\n" + "- the list must be sorted using cmp() as compare function\n" + "- it may not contain overlapping slices\n" + "- the slices may not contain negative indices\n" + "- if the taglist cannot contain overlapping slices, you can\n" + " give this function the taglist produced by tag() directly\n" + " (sorting is not needed, as the list will already be sorted)\n" + "- start and stop set the slice to work in, i.e. text[start:stop]" +) +{ + PyObject *list; + PyObject *text; + Py_ssize_t text_len = INT_MAX; + Py_ssize_t pos = 0; + + Py_Get4Args("OO|ii:joinlist",text,list,pos,text_len); + + return mxTextTools_Joinlist(text, list, pos, text_len); + + onError: + return NULL; +} + +Py_C_Function( mxTextTools_charsplit, + "charsplit(text,char,start=0,stop=len(text))\n\n" + "Split text[start:stop] into substrings at char and\n" + "return the result as list of strings." +) +{ + PyObject *text, *separator; + Py_ssize_t text_len = INT_MAX; + Py_ssize_t start = 0; + + Py_Get4Args("OO|ii:charsplit", + text,separator,start,text_len); + + return mxTextTools_CharSplit(text, separator, + start, text_len); + + onError: + return NULL; +} + +Py_C_Function( mxTextTools_splitat, + "splitat(text,char,nth=1,start=0,stop=len(text))\n\n" + "Split text[start:stop] into two substrings at the nth\n" + "occurance of char and return the result as 2-tuple. If the\n" + "character is not found, the second string is empty. nth may\n" + "be negative: the search is then done from the right and the\n" + "first string is empty in case the character is not found." +) +{ + PyObject *text, *separator; + Py_ssize_t text_len = INT_MAX; + Py_ssize_t start = 0; + Py_ssize_t nth = 1; + + Py_Get5Args("OO|iii:splitat", + text,separator,nth,start,text_len); + + return mxTextTools_SplitAt(text, separator, + nth, start, text_len); + onError: + return NULL; +} + +Py_C_Function( mxTextTools_suffix, + "suffix(text,suffixes,start=0,stop=len(text)[,translate])\n\n" + "Looks at text[start:stop] and returns the first matching\n" + "suffix out of the tuple of strings given in suffixes.\n" + "If no suffix is found to be matching, None is returned.\n" + "The optional 256 char translate string is used to translate\n" + "the text prior to comparing it with the given suffixes." + ) +{ + PyObject *text, *suffixes, *translate = NULL; + Py_ssize_t text_len = INT_MAX; + Py_ssize_t start = 0; + + Py_Get5Args("OO|iiO:suffix", + text,suffixes,start,text_len,translate); + + return mxTextTools_Suffix(text, + suffixes, + start, text_len, + translate); + onError: + return NULL; +} + +Py_C_Function( mxTextTools_prefix, + "prefix(text,prefixes,start=0,stop=len(text)[,translate])\n\n" + "Looks at text[start:stop] and returns the first matching\n" + "prefix out of the tuple of strings given in prefixes.\n" + "If no prefix is found to be matching, None is returned.\n" + "The optional 256 char translate string is used to translate\n" + "the text prior to comparing it with the given suffixes." +) +{ + PyObject *text, *prefixes, *translate = NULL; + Py_ssize_t text_len = INT_MAX; + Py_ssize_t start = 0; + + Py_Get5Args("OO|iiO:prefix", + text,prefixes,start,text_len,translate); + + return mxTextTools_Prefix(text, + prefixes, + start, text_len, + translate); + onError: + return NULL; +} + +Py_C_Function( mxTextTools_set, + "set(string,logic=1)\n\n" + "Returns a character set for string: a bit encoded version\n" + "of the characters occurring in string.\n" + "- logic can be set to 0 if all characters *not* in string\n" + " should go into the set") +{ + PyObject *sto; + char *s,*st; + Py_ssize_t len_s; + int logic = 1; + Py_ssize_t i; + + Py_Get3Args("s#|i:set", + s,len_s,logic); + + sto = PyString_FromStringAndSize(NULL,32); + if (sto == NULL) + goto onError; + + st = PyString_AS_STRING(sto); + + if (logic) { + memset(st,0x00,32); + for (i = 0; i < len_s; i++,s++) { + int j = (unsigned char)*s; + + st[j >> 3] |= 1 << (j & 7); + } + } + else { + memset(st,0xFF,32); + for (i = 0; i < len_s; i++,s++) { + int j = (unsigned char)*s; + + st[j >> 3] &= ~(1 << (j & 7)); + } + } + return sto; + + onError: + return NULL; +} + +Py_C_Function( mxTextTools_setfind, + "setfind(text,set,start=0,stop=len(text))\n\n" + "Find the first occurence of any character from set in\n" + "text[start:stop]\n set must be a string obtained with set()\n" + "DEPRECATED: use CharSet().search() instead." +) +{ + PyObject *text; + PyObject *set; + Py_ssize_t text_len = INT_MAX; + Py_ssize_t start = 0; + register Py_ssize_t x; + register char *tx; + register unsigned char *setstr; + + Py_Get4Args("OO|ii:setfind",text,set,start,text_len); + + Py_Assert(PyString_Check(text), + PyExc_TypeError, + "first argument needs to be a string"); + Py_Assert(PyString_Check(set) && PyString_GET_SIZE(set) == 32, + PyExc_TypeError, + "second argument needs to be a set"); + Py_CheckStringSlice(text,start,text_len); + + x = start; + tx = PyString_AS_STRING(text) + x; + setstr = (unsigned char *)PyString_AS_STRING(set); + + for (;x < text_len; tx++, x++) + if (Py_CharInSet(*tx,setstr)) + break; + + if (x == text_len) + /* Not found */ + return PyInt_FromLong(-1L); + else + return PyInt_FromLong(x); + + onError: + return NULL; +} + +Py_C_Function( mxTextTools_setstrip, + "setstrip(text,set,start=0,stop=len(text),mode=0)\n\n" + "Strip all characters in text[start:stop] appearing in set.\n" + "mode indicates where to strip (<0: left; =0: left and right;\n" + ">0: right). set must be a string obtained with set()\n" + "DEPRECATED: use CharSet().strip() instead." + ) +{ + char *tx; + Py_ssize_t tx_len; + char *setstr; + Py_ssize_t setstr_len; + Py_ssize_t start = 0; + Py_ssize_t stop = INT_MAX; + int mode = 0; + + Py_Get7Args("s#s#|iii:setstip", + tx,tx_len,setstr,setstr_len,start,stop,mode); + + return mxTextTools_SetStrip(tx, tx_len, + setstr, setstr_len, + start, stop, + mode); + + onError: + return NULL; +} + +Py_C_Function( mxTextTools_setsplit, + "setsplit(text,set,start=0,stop=len(text))\n\n" + "Split text[start:stop] into substrings using set,\n" + "omitting the splitting parts and empty substrings.\n" + "set must be a string obtained from set()\n" + "DEPRECATED: use CharSet().split() instead." + ) +{ + char *tx; + Py_ssize_t tx_len; + char *setstr; + Py_ssize_t setstr_len; + Py_ssize_t start = 0; + Py_ssize_t stop = INT_MAX; + + Py_Get6Args("s#s#|ii:setsplit", + tx,tx_len,setstr,setstr_len,start,stop); + + return mxTextTools_SetSplit(tx, tx_len, + setstr, setstr_len, + start, stop); + onError: + return NULL; +} + +Py_C_Function( mxTextTools_setsplitx, + "setsplitx(text,set,start=0,stop=len(text))\n\n" + "Split text[start:stop] into substrings using set, so\n" + "that every second entry consists only of characters in set.\n" + "set must be a string obtained with set()\n" + "DEPRECATED: use CharSet().splitx() instead." + ) +{ + Py_ssize_t text_len = INT_MAX; + Py_ssize_t start = 0; + char *tx; + Py_ssize_t tx_len; + char *setstr; + Py_ssize_t setstr_len; + + Py_Get6Args("s#s#|ii:setsplitx", + tx,tx_len,setstr,setstr_len,start,text_len); + + return mxTextTools_SetSplitX(tx, tx_len, + setstr, setstr_len, + start, text_len); + onError: + return NULL; +} + +Py_C_Function( mxTextTools_upper, + "upper(text)\n\n" + "Return text converted to upper case.") +{ + PyObject *text; + + Py_GetArgObject(text); + if (PyString_Check(text)) + return mxTextTools_Upper(text); +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) + return mxTextTools_UnicodeUpper(text); +#endif + else + Py_Error(PyExc_TypeError, + "expected string or unicode"); + + onError: + return NULL; +} + +Py_C_Function( mxTextTools_lower, + "lower(text)\n\n" + "Return text converted to lower case.") +{ + PyObject *text; + + Py_GetArgObject(text); + if (PyString_Check(text)) + return mxTextTools_Lower(text); +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) + return mxTextTools_UnicodeLower(text); +#endif + else + Py_Error(PyExc_TypeError, + "expected string or unicode"); + + onError: + return NULL; +} + +Py_C_Function( mxTextTools_str2hex, + "str2hex(text)\n\n" + "Return text converted to a string consisting of two byte\n" + "HEX values.") +{ + char *str; + Py_ssize_t len; + + Py_Get2Args("s#",str,len); + + return mxTextTools_HexStringFromString(str,len); + + onError: + return NULL; +} + +Py_C_Function( mxTextTools_hex2str, + "hex2str(text)\n\n" + "Return text interpreted as two byte HEX values converted\n" + "to a string.") +{ + char *str; + Py_ssize_t len; + + Py_Get2Args("s#",str,len); + + return mxTextTools_StringFromHexString(str,len); + + onError: + return NULL; +} + +Py_C_Function( mxTextTools_isascii, + "isascii(text,start=0,stop=len(text))\n\n" + "Return 1/0 depending on whether text only contains ASCII\n" + "characters." + ) +{ + PyObject *text; + Py_ssize_t start=0, stop = INT_MAX; + int rc; + + Py_GetArgObject(text); + rc = mxTextTools_IsASCII(text, start, stop); + if (rc < 0) + goto onError; + return PyInt_FromLong(rc); + + onError: + return NULL; +} + +/* --- module init --------------------------------------------------------- */ + +/* Python Method Table */ + +static PyMethodDef Module_methods[] = +{ + Py_MethodWithKeywordsListEntry("tag",mxTextTools_tag), + Py_MethodListEntry("join",mxTextTools_join), + Py_MethodListEntry("cmp",mxTextTools_cmp), + Py_MethodListEntry("joinlist",mxTextTools_joinlist), + Py_MethodListEntry("set",mxTextTools_set), + Py_MethodListEntry("setfind",mxTextTools_setfind), + Py_MethodListEntry("setsplit",mxTextTools_setsplit), + Py_MethodListEntry("setsplitx",mxTextTools_setsplitx), + Py_MethodListEntry("setstrip",mxTextTools_setstrip), + Py_MethodWithKeywordsListEntry("TextSearch",mxTextSearch_TextSearch), + Py_MethodListEntry("CharSet",mxCharSet_CharSet), + Py_MethodListEntry("TagTable",mxTagTable_TagTable), +#ifdef HAVE_UNICODE + Py_MethodListEntry("UnicodeTagTable",mxTagTable_UnicodeTagTable), +#endif + Py_MethodListEntrySingleArg("upper",mxTextTools_upper), + Py_MethodListEntrySingleArg("lower",mxTextTools_lower), + Py_MethodListEntry("charsplit",mxTextTools_charsplit), + Py_MethodListEntry("splitat",mxTextTools_splitat), + Py_MethodListEntry("suffix",mxTextTools_suffix), + Py_MethodListEntry("prefix",mxTextTools_prefix), + Py_MethodListEntry("hex2str",mxTextTools_hex2str), + Py_MethodListEntry("str2hex",mxTextTools_str2hex), + Py_MethodListEntrySingleArg("isascii",mxTextTools_isascii), + {NULL,NULL} /* end of list */ +}; + +/* Cleanup function */ +static +void mxTextToolsModule_Cleanup(void) +{ + mxTextTools_TagTables = NULL; + + /* Reset mxTextTools_Initialized flag */ + mxTextTools_Initialized = 0; +} + +#if PY_MAJOR_VERSION >= 3 +static struct PyModuleDef mxTextTools_ModuleDef = { + PyModuleDef_HEAD_INIT, + MXTEXTTOOLS_MODULE, + Module_docstring, + -1, + Module_methods +}; +#endif + +static PyObject* mxTextToolsModule_Initialize(void) +{ + PyObject *module; + + if (mxTextTools_Initialized) { + PyErr_SetString(PyExc_SystemError, + "can't initialize "MXTEXTTOOLS_MODULE" more than once"); + return NULL; + } + + /* Init type objects */ + if (PyType_Ready(&mxTextSearch_Type) < 0) + return NULL; + if (PyType_Ready(&mxCharSet_Type) < 0) + return NULL; + if (PyType_Ready(&mxTagTable_Type) < 0) + return NULL; + + /* create module */ +#if PY_MAJOR_VERSION >= 3 + module = PyModule_Create(&mxTextTools_ModuleDef); +#else + module = Py_InitModule4(MXTEXTTOOLS_MODULE, /* Module name */ + Module_methods, /* Method list */ + Module_docstring, /* Module doc-string */ + (PyObject *)NULL, /* always pass this as *self */ + PYTHON_API_VERSION); /* API Version */ +#endif + if (!module) + return NULL; + + /* Init TagTable cache */ + mxTextTools_TagTables = PyDict_New(); + if (!mxTextTools_TagTables) + return NULL; + + /* Register cleanup function */ + if (Py_AtExit(mxTextToolsModule_Cleanup) < 0) + return NULL; + + /* Add some symbolic constants to the module */ + if (PyModule_AddStringConstant(module, "__version__", VERSION) < 0) + return NULL; + mx_ToUpper = mxTextTools_ToUpper(); + if (!mx_ToUpper) + return NULL; + if (PyModule_AddObject(module, "to_upper", mx_ToUpper) < 0) + return NULL; + mx_ToLower = mxTextTools_ToLower(); + if (!mx_ToLower) + return NULL; + if (PyModule_AddObject(module, "to_lower", mx_ToLower) < 0) + return NULL; + + /* Let the tag table cache live in the module dictionary; we just + keep a weak reference in mxTextTools_TagTables around. */ + if (PyModule_AddObject(module, "tagtable_cache", mxTextTools_TagTables) < 0) + return NULL; + Py_DECREF(mxTextTools_TagTables); + + ADD_INT_CONSTANT("BOYERMOORE", MXTEXTSEARCH_BOYERMOORE); + ADD_INT_CONSTANT("FASTSEARCH", MXTEXTSEARCH_FASTSEARCH); + ADD_INT_CONSTANT("TRIVIAL", MXTEXTSEARCH_TRIVIAL); + + /* Init exceptions */ + mxTextTools_Error = PyErr_NewException("mxTextTools.Error", PyExc_Exception, NULL); + if (!mxTextTools_Error) + return NULL; + if (PyModule_AddObject(module, "Error", mxTextTools_Error) < 0) + return NULL; + + /* Type objects */ + Py_INCREF(&mxTextSearch_Type); + if (PyModule_AddObject(module, "TextSearchType", (PyObject*) &mxTextSearch_Type) < 0) + return NULL; + Py_INCREF(&mxCharSet_Type); + if (PyModule_AddObject(module, "CharSetType", (PyObject*) &mxCharSet_Type) < 0) + return NULL; + Py_INCREF(&mxTagTable_Type); + if (PyModule_AddObject(module, "TagTableType", (PyObject*) &mxTagTable_Type) < 0) + return NULL; + + /* Tag Table command symbols (these will be exposed via + simpleparse.stt.TextTools.Constants.TagTables) */ + ADD_INT_CONSTANT("_const_AllIn", MATCH_ALLIN); + ADD_INT_CONSTANT("_const_AllNotIn", MATCH_ALLNOTIN); + ADD_INT_CONSTANT("_const_Is", MATCH_IS); + ADD_INT_CONSTANT("_const_IsIn", MATCH_ISIN); + ADD_INT_CONSTANT("_const_IsNot", MATCH_ISNOTIN); + ADD_INT_CONSTANT("_const_IsNotIn", MATCH_ISNOTIN); + + ADD_INT_CONSTANT("_const_Word", MATCH_WORD); + ADD_INT_CONSTANT("_const_WordStart", MATCH_WORDSTART); + ADD_INT_CONSTANT("_const_WordEnd", MATCH_WORDEND); + + ADD_INT_CONSTANT("_const_AllInSet", MATCH_ALLINSET); + ADD_INT_CONSTANT("_const_IsInSet", MATCH_ISINSET); + ADD_INT_CONSTANT("_const_AllInCharSet", MATCH_ALLINCHARSET); + ADD_INT_CONSTANT("_const_IsInCharSet", MATCH_ISINCHARSET); + + ADD_INT_CONSTANT("_const_Fail", MATCH_FAIL); + ADD_INT_CONSTANT("_const_Jump", MATCH_JUMP); + ADD_INT_CONSTANT("_const_EOF", MATCH_EOF); + ADD_INT_CONSTANT("_const_Skip", MATCH_SKIP); + ADD_INT_CONSTANT("_const_Move", MATCH_MOVE); + + ADD_INT_CONSTANT("_const_JumpTarget", MATCH_JUMPTARGET); + + ADD_INT_CONSTANT("_const_sWordStart", MATCH_SWORDSTART); + ADD_INT_CONSTANT("_const_sWordEnd", MATCH_SWORDEND); + ADD_INT_CONSTANT("_const_sFindWord", MATCH_SFINDWORD); + ADD_INT_CONSTANT("_const_NoWord", MATCH_NOWORD); + + ADD_INT_CONSTANT("_const_Call", MATCH_CALL); + ADD_INT_CONSTANT("_const_CallArg", MATCH_CALLARG); + + ADD_INT_CONSTANT("_const_Table", MATCH_TABLE); + ADD_INT_CONSTANT("_const_SubTable", MATCH_SUBTABLE); + ADD_INT_CONSTANT("_const_TableInList", MATCH_TABLEINLIST); + ADD_INT_CONSTANT("_const_SubTableInList", MATCH_SUBTABLEINLIST); + + ADD_INT_CONSTANT("_const_Loop", MATCH_LOOP); + ADD_INT_CONSTANT("_const_LoopControl", MATCH_LOOPCONTROL); + + /* Tag Table command flags */ + ADD_INT_CONSTANT("_const_CallTag", MATCH_CALLTAG); + ADD_INT_CONSTANT("_const_AppendToTagobj", MATCH_APPENDTAG); + ADD_INT_CONSTANT("_const_AppendTagobj", MATCH_APPENDTAGOBJ); + ADD_INT_CONSTANT("_const_AppendMatch", MATCH_APPENDMATCH); + ADD_INT_CONSTANT("_const_LookAhead", MATCH_LOOKAHEAD); + + /* Tag Table argument integers */ + ADD_INT_CONSTANT("_const_To", MATCH_JUMP_TO); + ADD_INT_CONSTANT("_const_MatchOk", MATCH_JUMP_MATCHOK); + ADD_INT_CONSTANT("_const_MatchFail", MATCH_JUMP_MATCHFAIL); + ADD_INT_CONSTANT("_const_ToEOF", MATCH_MOVE_EOF); + ADD_INT_CONSTANT("_const_ToBOF", MATCH_MOVE_BOF); + ADD_INT_CONSTANT("_const_Here", MATCH_FAIL_HERE); + + ADD_INT_CONSTANT("_const_ThisTable", MATCH_THISTABLE); + + ADD_INT_CONSTANT("_const_Break", MATCH_LOOPCONTROL_BREAK); + ADD_INT_CONSTANT("_const_Reset", MATCH_LOOPCONTROL_RESET); + + DPRINTF("sizeof(string_charset)=%i bytes\n", sizeof(string_charset)); +#ifdef HAVE_UNICODE + DPRINTF("sizeof(unicode_charset)=%i bytes\n", sizeof(unicode_charset)); +#endif + + /* We are now initialized */ + mxTextTools_Initialized = 1; + + return module; +} + +#if PY_MAJOR_VERSION >= 3 +PyMODINIT_FUNC PyInit_mxTextTools(void) +{ + return mxTextToolsModule_Initialize(); +} +#else +MX_EXPORT(void) initmxTextTools(void) +{ + mxTextToolsModule_Initialize(); +} +#endif + diff -Nru simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/mxTextTools.c.~1~ simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/mxTextTools.c.~1~ --- simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/mxTextTools.c.~1~ 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/mxTextTools.c.~1~ 2015-07-23 14:49:22.000000000 +0000 @@ -0,0 +1,5321 @@ +/* + mxTextTools -- Fast text manipulation routines + + Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com + Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com +*/ + +/* We want all our symbols to be exported */ +#define MX_BUILDING_MXTEXTTOOLS + +/* Logging file used by debugging facility */ +#ifndef MAL_DEBUG_OUTPUTFILE +# define MAL_DEBUG_OUTPUTFILE "mxTextTools.log" +#endif + +#include "mx.h" +#include "mxTextTools.h" +#include + +#define VERSION "2.1.0" + +/* Initial list size used by e.g. setsplit(), setsplitx(),... */ +#define INITIAL_LIST_SIZE 64 + +/* Maximum TagTable cache size. If this limit is reached, the cache + is cleared to make room for new compile TagTables. */ +#define MAX_TAGTABLES_CACHE_SIZE 100 + +/* Define this to enable the copy-protocol (__copy__, __deepcopy__) */ +#define COPY_PROTOCOL + +/* --- module doc-string -------------------------------------------------- */ + +static char *Module_docstring = + + MXTEXTTOOLS_MODULE" -- Tools for fast text processing. Version "VERSION"\n\n" + + "Copyright (c) 1997-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com\n" + "Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com\n\n" + "Copyright (c) 2003-2006, Mike Fletcher; mailto:mcfletch@vrplumber.com\n\n" + + " All Rights Reserved\n\n" + "See the documentation for further information on copyrights,\n" + "or contact the author." +; + +/* --- internal macros ---------------------------------------------------- */ + +/* --- module globals ----------------------------------------------------- */ + +/* Translation strings for the 8-bit versions of lower() and upper() */ +static PyObject *mx_ToUpper; +static PyObject *mx_ToLower; + +static PyObject *mxTextTools_Error; /* mxTextTools specific error */ + +static PyObject *mxTextTools_TagTables; /* TagTable cache dictionary */ + +/* Flag telling us whether the module was initialized or not. */ +static int mxTextTools_Initialized = 0; + +/* --- forward declarations ----------------------------------------------- */ + +/* --- module helper ------------------------------------------------------ */ + +static +PyObject *mxTextTools_ToUpper(void) +{ + char tr[256]; + Py_ssize_t i; + + for (i = 0; i < 256; i++) + tr[i] = toupper((char)i); + return PyString_FromStringAndSize(tr,sizeof(tr)); +} + +static +PyObject *mxTextTools_ToLower(void) +{ + char tr[256]; + Py_ssize_t i; + + for (i = 0; i < 256; i++) + tr[i] = tolower((char)i); + return PyString_FromStringAndSize(tr,sizeof(tr)); +} + +/* Create an exception object, insert it into the module dictionary + under the given name and return the object pointer; this is NULL in + case an error occurred. base can be given to indicate the base + object to be used by the exception object. It should be NULL + otherwise */ + +static +PyObject *insexc(PyObject *moddict, + char *name, + PyObject *base) +{ + PyObject *v; + char fullname[256]; + char *modname; + char *dot; + + v = PyDict_GetItemString(moddict, "__name__"); + if (v == NULL) + modname = NULL; + else + modname = PyString_AsString(v); + if (modname == NULL) { + PyErr_Clear(); + modname = MXTEXTTOOLS_MODULE; + } + /* The symbols from this extension are imported into + simpleparse.stt.TextTools. We trim the name to not confuse the user with an + overly long package path. */ + strcpy(fullname, modname); + dot = strchr(fullname, '.'); + if (dot) + dot = strchr(dot+1, '.'); + if (dot) + strcpy(dot+1, name); + else + sprintf(fullname, "%s.%s", modname, name); + + v = PyErr_NewException(fullname, base, NULL); + if (v == NULL) + return NULL; + if (PyDict_SetItemString(moddict,name,v)) + return NULL; + return v; +} + +/* Helper for adding integer constants to a dictionary. Check for + errors with PyErr_Occurred() */ +static +void insint(PyObject *dict, + char *name, + int value) +{ + PyObject *v = PyInt_FromLong((long)value); + PyDict_SetItemString(dict, name, v); + Py_XDECREF(v); +} + +/* --- module interface --------------------------------------------------- */ + +/* --- Text Search Object ----------------------------------------------*/ + +staticforward PyMethodDef mxTextSearch_Methods[]; + +/* allocation */ + +static +PyObject *mxTextSearch_New(PyObject *match, + PyObject *translate, + int algorithm) +{ + mxTextSearchObject *so; + + so = PyObject_NEW(mxTextSearchObject, &mxTextSearch_Type); + if (so == NULL) + return NULL; + so->data = NULL; + so->translate = NULL; + so->match = NULL; + + Py_INCREF(match); + so->match = match; + + if (translate == Py_None) + translate = NULL; + else if (translate) { + Py_Assert(PyString_Check(translate), + PyExc_TypeError, + "translate table must be a string"); + Py_Assert(PyString_GET_SIZE(translate) == 256, + PyExc_TypeError, + "translate string must have exactly 256 chars"); + Py_INCREF(translate); + } + so->translate = translate; + + /* Init algorithm */ + so->algorithm = algorithm; + switch (algorithm) { + + case MXTEXTSEARCH_BOYERMOORE: + Py_Assert(PyString_Check(match), + PyExc_TypeError, + "match must be a string for Boyer-Moore"); + so->data = bm_init(PyString_AS_STRING(match), + PyString_GET_SIZE(match)); + Py_Assert(so->data != NULL, + PyExc_TypeError, + "error initializing the search object"); + break; + +#ifdef MXFASTSEARCH + case MXTEXTSEARCH_FASTSEARCH: + Py_Assert(PyString_Check(match), + PyExc_TypeError, + "match must be a string for FastSearch"); + so->data = fs_init(PyString_AS_STRING(match), + PyString_GET_SIZE(match)); + Py_Assert(so->data != NULL, + PyExc_TypeError, + "error initializing the search object"); + break; +#endif + + case MXTEXTSEARCH_TRIVIAL: + Py_Assert(PyString_Check(match) || PyUnicode_Check(match), + PyExc_TypeError, + "match must be a string or unicode"); + Py_Assert(so->translate == NULL, + PyExc_TypeError, + "trivial search algorithm does not support translate"); + break; + + default: + Py_Error(PyExc_ValueError, + "unknown or unsupported algorithm"); + + } + return (PyObject *)so; + + onError: + Py_DECREF(so); + return NULL; +} + +Py_C_Function_WithKeywords( + mxTextSearch_TextSearch, + "TextSearch(match[,translate=None,algorithm=default_algorithm])\n\n" + "Create a substring search object for the string match;\n" + "translate is an optional translate-string like the one used\n" + "in the module re." + ) +{ + PyObject *match = 0; + PyObject *translate = 0; + int algorithm = -424242; + + Py_KeywordsGet3Args("O|Oi:TextSearch",match,translate,algorithm); + + if (algorithm == -424242) { + if (PyUnicode_Check(match)) + algorithm = MXTEXTSEARCH_TRIVIAL; + else +#ifdef MXFASTSEARCH + algorithm = MXTEXTSEARCH_BOYERMOORE; +#else + algorithm = MXTEXTSEARCH_BOYERMOORE; +#endif + } + return mxTextSearch_New(match, translate, algorithm); + + onError: + return NULL; +} + +static +void mxTextSearch_Free(mxTextSearchObject *so) +{ + if (so->data) { + switch (so->algorithm) { + + case MXTEXTSEARCH_BOYERMOORE: + bm_free(so->data); + break; + +#ifdef MXFASTSEARCH + case MXTEXTSEARCH_FASTSEARCH: + fs_free(so->data); + break; +#endif + case MXTEXTSEARCH_TRIVIAL: + break; + + } + } + Py_XDECREF(so->match); + Py_XDECREF(so->translate); + PyObject_Del(so); +} + +/* C APIs */ + +#define so ((mxTextSearchObject *)self) + +/* Get the match length from an TextSearch object or -1 in case of an + error. */ + +Py_ssize_t mxTextSearch_MatchLength(PyObject *self) +{ + Py_Assert(mxTextSearch_Check(self), + PyExc_TypeError, + "expected a TextSearch object"); + + switch (so->algorithm) { + + case MXTEXTSEARCH_BOYERMOORE: + return BM_MATCH_LEN(so->data); + break; + +#ifdef MXFASTSEARCH + case MXTEXTSEARCH_FASTSEARCH: + return FS_MATCH_LEN(so->data); + break; +#endif + + case MXTEXTSEARCH_TRIVIAL: + if (PyString_Check(so->match)) + return PyString_GET_SIZE(so->match); +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(so->match)) + return PyUnicode_GET_SIZE(so->match); +#endif + break; + + } + + Py_Error(mxTextTools_Error, + "internal error"); + + onError: + return -1; +} + +static +Py_ssize_t trivial_search(const char *text, + Py_ssize_t start, + Py_ssize_t stop, + const char *match, + Py_ssize_t match_len) +{ + Py_ssize_t ml1 = match_len - 1; + register const char *tx = &text[start]; + register Py_ssize_t x = start; + + if (ml1 < 0) + return start; + + /* Brute-force method; from right to left */ + for (;;) { + register Py_ssize_t j = ml1; + register const char *mj = &match[j]; + + if (x + j >= stop) + /* reached eof: no match */ + return start; + + /* scan from right to left */ + for (tx += j; j >= 0 && *tx == *mj; + tx--, mj--, j--) ; + + if (j < 0) { + /* found */ + x += ml1 + 1; + return x; + } + /* not found: rewind and advance one char */ + tx -= j - 1; + x++; + } + return start; +} + +#ifdef HAVE_UNICODE +static +Py_ssize_t trivial_unicode_search(const Py_UNICODE *text, + Py_ssize_t start, + Py_ssize_t stop, + const Py_UNICODE *match, + Py_ssize_t match_len) +{ + Py_ssize_t ml1 = match_len - 1; + register const Py_UNICODE *tx = &text[start]; + register Py_ssize_t x = start; + + if (ml1 < 0) + return start; + + /* Brute-force method; from right to left */ + for (;;) { + register Py_ssize_t j = ml1; + register const Py_UNICODE *mj = &match[j]; + + if (x + j >= stop) + /* reached eof: no match */ + return start; + + /* scan from right to left */ + for (tx += j; j >= 0 && *tx == *mj; + tx--, mj--, j--) ; + + if (j < 0) { + /* found */ + x += ml1 + 1; + return x; + } + /* not found: rewind and advance one char */ + tx -= j - 1; + x++; + } + return start; +} +#endif + +/* Search for the match in text[start:stop]. + + Returns 1 in case a match was found and sets sliceleft, sliceright + to the matching slice. + + Returns 0 in case no match was found and -1 in case of an error. + +*/ + +Py_ssize_t mxTextSearch_SearchBuffer(PyObject *self, + char *text, + Py_ssize_t start, + Py_ssize_t stop, + Py_ssize_t *sliceleft, + Py_ssize_t *sliceright) +{ + Py_ssize_t nextpos; + Py_ssize_t match_len; + + Py_Assert(mxTextSearch_Check(self), + PyExc_TypeError, + "expected a TextSearch object"); + + switch (so->algorithm) { + + case MXTEXTSEARCH_BOYERMOORE: + if (so->translate) { + /* search with translate table */ + nextpos = bm_tr_search((mxbmse_data *)so->data, + text, + start, + stop, + PyString_AS_STRING(so->translate)); + } + else { + /* exact search */ + nextpos = bm_search((mxbmse_data *)so->data, + text, + start, + stop); + } + match_len = BM_MATCH_LEN(so->data); + break; + +#ifdef MXFASTSEARCH + case MXTEXTSEARCH_FASTSEARCH: + if (so->translate) { + /* search with translate table */ + nextpos = fs_tr_search((mxfse_data *)so->data, + text, + start, + stop, + PyString_AS_STRING(so->translate)); + } + else { + /* exact search */ + nextpos = fs_search((mxfse_data *)so->data, + text, + start, + stop); + } + match_len = FS_MATCH_LEN(so->data); + break; +#endif + + case MXTEXTSEARCH_TRIVIAL: + { + const char *match; + + if (PyString_Check(so->match)) { + match = PyString_AS_STRING(so->match); + match_len = PyString_GET_SIZE(so->match); + } + else if (PyObject_AsCharBuffer(so->match, &match, &match_len)) + goto onError; + nextpos = trivial_search(text, + start, + stop, + match, + match_len); + } + break; + + default: + Py_Error(mxTextTools_Error, + "unknown algorithm type in mxTextSearch_SearchBuffer"); + + } + /* Found ? */ + if (nextpos != start) { + if (sliceleft) + *sliceleft = nextpos - match_len; + if (sliceright) + *sliceright = nextpos; + return 1; + } + /* Not found */ + return 0; + + onError: + return -1; +} + +#ifdef HAVE_UNICODE +Py_ssize_t mxTextSearch_SearchUnicode(PyObject *self, + Py_UNICODE *text, + Py_ssize_t start, + Py_ssize_t stop, + Py_ssize_t *sliceleft, + Py_ssize_t *sliceright) +{ + Py_ssize_t nextpos; + Py_ssize_t match_len; + + Py_Assert(mxTextSearch_Check(self), + PyExc_TypeError, + "expected a TextSearch object"); + + switch (so->algorithm) { + + case MXTEXTSEARCH_BOYERMOORE: + Py_Error(PyExc_TypeError, + "Boyer-Moore search algorithm does not support Unicode"); + break; + +#ifdef MXFASTSEARCH + case MXTEXTSEARCH_FASTSEARCH: + Py_Error(PyExc_TypeError, + "FastSearch search algorithm does not support Unicode"); +#endif + + case MXTEXTSEARCH_TRIVIAL: + { + PyObject *u; + Py_UNICODE *match; + + if (PyUnicode_Check(so->match)) { + u = NULL; + match = PyUnicode_AS_UNICODE(so->match); + match_len = PyUnicode_GET_SIZE(so->match); + } + else { + u = PyUnicode_FromEncodedObject(so->match, NULL, NULL); + if (u == NULL) + goto onError; + match = PyUnicode_AS_UNICODE(u); + match_len = PyUnicode_GET_SIZE(u); + } + nextpos = trivial_unicode_search(text, + start, + stop, + match, + match_len); + Py_XDECREF(u); + } + break; + + default: + Py_Error(mxTextTools_Error, + "unknown algorithm type in mxTextSearch_SearchUnicode"); + + } + /* Found ? */ + if (nextpos != start) { + if (sliceleft) + *sliceleft = nextpos - match_len; + if (sliceright) + *sliceright = nextpos; + return 1; + } + /* Not found */ + return 0; + + onError: + return -1; +} +#endif + +/* methods */ + +Py_C_Function( mxTextSearch_search, + "TextSearch.search(text,start=0,stop=len(text))\n\n" + "Search for the substring in text, looking only at the\n" + "slice [start:stop] and return the slice (l,r)\n" + "where the substring was found, (start,start) otherwise.") +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *text; + Py_ssize_t start = 0; + Py_ssize_t stop = INT_MAX; + Py_ssize_t sliceleft, sliceright; + int rc; + + Py_Get3Args("O|ii:TextSearch.search", + text,start,stop); + + if (PyString_Check(text)) { + Py_CheckStringSlice(text, start, stop); + rc = mxTextSearch_SearchBuffer(self, + PyString_AS_STRING(text), + start, + stop, + &sliceleft, + &sliceright); + } +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) { + Py_CheckUnicodeSlice(text, start, stop); + rc = mxTextSearch_SearchUnicode(self, + PyUnicode_AS_UNICODE(text), + start, + stop, + &sliceleft, + &sliceright); + } +#endif + else + Py_Error(PyExc_TypeError, + "expected string or unicode"); + if (rc < 0) + goto onError; + if (rc == 0) { + sliceleft = start; + sliceright = start; + } + + /* Return the slice */ + Py_Return2("ii", sliceleft, sliceright); + + onError: + return NULL; +} + +Py_C_Function( mxTextSearch_find, + "TextSearch.find(text,start=0,stop=len(text))\n\n" + "Search for the substring in text, looking only at the\n" + "slice [start:stop] and return the index\n" + "where the substring was found, -1 otherwise.") +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *text; + Py_ssize_t start = 0; + Py_ssize_t stop = INT_MAX; + Py_ssize_t sliceleft, sliceright; + int rc; + + Py_Get3Args("O|ii:TextSearch.find", + text,start,stop); + + if (PyString_Check(text)) { + Py_CheckStringSlice(text, start, stop); + rc = mxTextSearch_SearchBuffer(self, + PyString_AS_STRING(text), + start, + stop, + &sliceleft, + &sliceright); + } +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) { + Py_CheckUnicodeSlice(text, start, stop); + rc = mxTextSearch_SearchUnicode(self, + PyUnicode_AS_UNICODE(text), + start, + stop, + &sliceleft, + &sliceright); + } +#endif + else + Py_Error(PyExc_TypeError, + "expected string or unicode"); + if (rc < 0) + goto onError; + if (rc == 0) + sliceleft = -1; + return PyInt_FromLong(sliceleft); + + onError: + return NULL; +} + +Py_C_Function( mxTextSearch_findall, + "TextSearch.findall(text,start=0,stop=len(text))\n\n" + "Search for the substring in text, looking only at the\n" + "slice [start:stop] and return a list of all\n" + "non overlapping slices (l,r) in text where the match\n" + "string can be found.") +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *text; + PyObject *list = 0; + Py_ssize_t start = 0; + Py_ssize_t stop = INT_MAX; + Py_ssize_t stop_index; + Py_ssize_t match_len; + Py_ssize_t listsize = INITIAL_LIST_SIZE; + Py_ssize_t listitem = 0; + + Py_Get3Args("O|ii:TextSearch.findall", + text,start,stop); + + if (PyString_Check(text)) { + Py_CheckStringSlice(text, start, stop); + } +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) { + Py_CheckUnicodeSlice(text, start, stop); + } +#endif + else + Py_Error(PyExc_TypeError, + "expected string or unicode"); + + list = PyList_New(listsize); + if (!list) + goto onError; + + match_len = mxTextSearch_MatchLength(self); + if (match_len < 0) + goto onError; + stop_index = stop - match_len; + + while (start <= stop_index) { + register PyObject *t,*v; + int rc; + Py_ssize_t sliceleft, sliceright; + + /* exact search */ + if (PyString_Check(text)) + rc = mxTextSearch_SearchBuffer(self, + PyString_AS_STRING(text), + start, + stop, + &sliceleft, + &sliceright); +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) + rc = mxTextSearch_SearchUnicode(self, + PyUnicode_AS_UNICODE(text), + start, + stop, + &sliceleft, + &sliceright); +#endif + else + break; + if (rc < 0) + goto onError; + if (rc == 0) + break; + + /* Build slice and append to list */ + t = PyTuple_New(2); + if (!t) + goto onError; + v = PyInt_FromLong(sliceleft); + if (!v) + goto onError; + PyTuple_SET_ITEM(t,0,v); + v = PyInt_FromLong(sliceright); + if (!v) + goto onError; + PyTuple_SET_ITEM(t,1,v); + + if (listitem < listsize) + PyList_SET_ITEM(list, listitem, t); + else { + PyList_Append(list, t); + Py_DECREF(t); + } + listitem++; + + start = sliceright; + } + + /* Resize list if necessary */ + if (listitem < listsize) + PyList_SetSlice(list, listitem, listsize, (PyObject*)NULL); + + return list; + + onError: + Py_XDECREF(list); + return NULL; +} + +#ifdef COPY_PROTOCOL +Py_C_Function( mxTextSearch_copy, + "copy([memo])\n\n" + "Return a new reference for the instance. This function\n" + "is used for the copy-protocol. Real copying doesn't take\n" + "place, since the instances are immutable.") +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *memo; + + Py_GetArg("|O",memo); + Py_INCREF(so); + return (PyObject *)so; + onError: + return NULL; +} +#endif + +#undef so + +/* --- slots --- */ + +static +PyObject *mxTextSearch_Repr(mxTextSearchObject *self) +{ + char *algoname; + PyObject *v; + char t[500], *reprstr; + + v = PyObject_Repr(self->match); + if (v == NULL) + return NULL; + reprstr = PyString_AsString(v); + if (reprstr == NULL) + return NULL; + + switch (self->algorithm) { + case MXTEXTSEARCH_BOYERMOORE: + algoname = "Boyer-Moore"; + break; +#ifdef MXFASTSEARCH + case MXTEXTSEARCH_FASTSEARCH: + algoname = "FastSearch"; + break; +#endif + case MXTEXTSEARCH_TRIVIAL: + algoname = "Trivial"; + break; + default: + algoname = ""; + } + + sprintf(t, "<%.50s TextSearch object for %.400s at 0x%lx>", + algoname, reprstr, (long)self); + Py_DECREF(v); + return PyString_FromString(t); +} + +static +PyObject *mxTextSearch_GetAttr(mxTextSearchObject *self, + char *name) +{ + PyObject *v; + + if (Py_WantAttr(name,"match")) { + v = self->match; + Py_INCREF(v); + return v; + } + else if (Py_WantAttr(name,"translate")) { + v = self->translate; + if (v == NULL) + v = Py_None; + Py_INCREF(v); + return v; + } + else if (Py_WantAttr(name,"algorithm")) + return PyInt_FromLong(self->algorithm); + else if (Py_WantAttr(name,"__members__")) + return Py_BuildValue("[sss]", + "match", "translate", "algorithm"); + + return Py_FindMethod(mxTextSearch_Methods, (PyObject *)self, (char *)name); +} + +/* Python Type Table */ + +PyTypeObject mxTextSearch_Type = { + PyObject_HEAD_INIT(0) /* init at startup ! */ + 0, /*ob_size*/ + "TextSearch", /*tp_name*/ + sizeof(mxTextSearchObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + /* methods */ + (destructor)mxTextSearch_Free, /*tp_dealloc*/ + (printfunc)0, /*tp_print*/ + (getattrfunc)mxTextSearch_GetAttr, /*tp_getattr*/ + (setattrfunc)0, /*tp_setattr*/ + (cmpfunc)0, /*tp_compare*/ + (reprfunc)mxTextSearch_Repr, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_number*/ + 0, /*tp_as_mapping*/ + (hashfunc)0, /*tp_hash*/ + (ternaryfunc)0, /*tp_call*/ + (reprfunc)0, /*tp_str*/ + (getattrofunc)0, /*tp_getattro*/ + (setattrofunc)0, /*tp_setattro*/ +}; + +/* Python Method Table */ + +statichere +PyMethodDef mxTextSearch_Methods[] = +{ + Py_MethodListEntry("search",mxTextSearch_search), + Py_MethodListEntry("find",mxTextSearch_find), + Py_MethodListEntry("findall",mxTextSearch_findall), +#ifdef COPY_PROTOCOL + Py_MethodListEntry("__deepcopy__",mxTextSearch_copy), + Py_MethodListEntry("__copy__",mxTextSearch_copy), +#endif + {NULL,NULL} /* end of list */ +}; + +/* --- Character Set Object --------------------------------------------*/ + +staticforward PyMethodDef mxCharSet_Methods[]; + +/* internal */ + +/* 8-bit character sets are implemented using a simple 32-byte + long bitmap with one bit per character. + + Addressing is done as follows: + + def char_is_set(ordinal): + return bitmap[ordinal >> 3] & (1 << (ordinal & 7)) + +*/ + +#define STRING_CHARSET_SIZE 256 +#define STRING_CHARSET_BITMAP_SIZE (STRING_CHARSET_SIZE / 8) + +typedef struct { + unsigned char bitmap[STRING_CHARSET_BITMAP_SIZE]; + /* character bitmap */ +} string_charset; + +static +int init_string_charset(mxCharSetObject *cs, + PyObject *definition) +{ + register Py_ssize_t i, j; + char *def = PyString_AS_STRING(definition); + const Py_ssize_t len = PyString_GET_SIZE(definition); + string_charset *lookup = 0; + register unsigned char *bitmap; + int logic = 1; + + /* Handle logic change (first char is '^' for negative matching) */ + if (len > 0 && def[0] == '^') { + logic = 0; + i = 1; + } + else + i = 0; + + /* Build 32-byte lookup bitmap (one bit per character) */ + lookup = (string_charset *)PyMem_Malloc(sizeof(string_charset)); + if (lookup == NULL) { + PyErr_NoMemory(); + goto onError; + } + memset(lookup, 0, sizeof(string_charset)); + cs->mode = MXCHARSET_8BITMODE; + cs->lookup = (void *)lookup; + bitmap = lookup->bitmap; + + for (; i < len; i++) { + + /* Handle escapes: "b\-d", "\\" */ + if (def[i] == '\\') { + if (i < len - 1 && def[i+1] == '\\') { + j = (unsigned char)'\\'; + bitmap[j >> 3] |= 1 << (j & 7); + i++; + } + continue; + } + + /* Handle ranges: "b-d", "\\-z", "\--z" */ + if (i < len - 2 && def[i+1] == '-') { + unsigned char range_left = def[i]; + unsigned char range_right = def[i+2]; + for (j = range_left; j <= range_right; j++) + bitmap[j >> 3] |= 1 << (j & 7); + i++; + continue; + } + + /* Normal processing */ + j = (unsigned char)def[i]; + bitmap[j >> 3] |= 1 << (j & 7); + } + + /* Invert bitmap if negative matching is requested */ + if (!logic) { + DPRINTF("init_string_charset: inverting bitmap\n"); + for (i = 0; i < STRING_CHARSET_BITMAP_SIZE; i++) + bitmap[i] ^= 0xFF; + } + + return 0; + + onError: + if (lookup) + PyMem_Free((void *)lookup); + cs->lookup = 0; + return -1; +} + +#ifdef HAVE_UNICODE + +/* Unicode character sets are implemented using two step indexing + which is a good compromise between lookup speed and memory usage. + + Lookup is done using a variable length array of 32-byte bitmap + blocks. There can be 256 such blocks. Identical blocks are + collapsed into a single copy. + + Addressing is done as follows: + + def char_is_set(ordinal): + index = bitmapindex[ordinal >> 8] + bitmap = bitmaps[index] + return bitmap[(ordinal >> 3) & 31] & (1 << (ordinal & 7)) + + The technique used here is very similar to what is done in Python's + SRE (see the BIGCHARSET patch by Martin von Loewis). Compression + should be reasonably good since character sets in practice usually + only contains a few single characters or longer ranges of Unicode + characters. + +*/ + +#define UNICODE_CHARSET_SIZE 65536 +#define UNICODE_CHARSET_BITMAP_SIZE 32 +#define UNICODE_CHARSET_BITMAPS (UNICODE_CHARSET_SIZE / (UNICODE_CHARSET_BITMAP_SIZE * 8)) +#define UNICODE_CHARSET_BIGMAP_SIZE (UNICODE_CHARSET_SIZE / 8) + +typedef struct { + unsigned char bitmapindex[UNICODE_CHARSET_BITMAPS]; + /* Index to char bitmaps */ + unsigned char bitmaps[UNICODE_CHARSET_BITMAPS][UNICODE_CHARSET_BITMAP_SIZE]; + /* Variable length bitmap array */ +} unicode_charset; + +static +int init_unicode_charset(mxCharSetObject *cs, + PyObject *definition) +{ + register Py_ssize_t i, j; + Py_UNICODE *def = PyUnicode_AS_UNICODE(definition); + const Py_ssize_t len = PyUnicode_GET_SIZE(definition); + unicode_charset *lookup = 0; + unsigned char bigmap[UNICODE_CHARSET_BIGMAP_SIZE]; + Py_ssize_t blocks; + int logic = 1; + + /* Handle logic change (first char is '^' for negative matching) */ + if (len > 0 && def[0] == '^') { + logic = 0; + i = 1; + } + else + i = 0; + + /* Build bigmap */ + memset(bigmap, 0, sizeof(bigmap)); + for (; i < len; i++) { + + /* Handle escapes: "b\-d", "\\" */ + if (def[i] == '\\') { + if (i < len - 1 && def[i+1] == '\\') { + j = (int)'\\'; + bigmap[j >> 3] |= 1 << (j & 7); + i++; + } + continue; + } + + /* Handle ranges: "b-d", "\\-z", "\--z" */ + if (i < len - 2 && def[i+1] == '-') { + Py_UNICODE range_left = def[i]; + Py_UNICODE range_right = def[i+2]; + if (range_right >= UNICODE_CHARSET_SIZE) { + Py_Error(PyExc_ValueError, + "unicode ordinal out of supported range"); + } + for (j = range_left; j <= range_right; j++) + bigmap[j >> 3] |= 1 << (j & 7); + i++; + continue; + } + + /* Normal processing */ + j = def[i]; + if (j >= UNICODE_CHARSET_SIZE) { + Py_Error(PyExc_ValueError, + "unicode ordinal out of supported range"); + } + bigmap[j >> 3] |= 1 << (j & 7); + } + + /* Build lookup table + + XXX Could add dynamic resizing here... probably not worth it + though, since sizeof(unicode_charset) isn't all that large. + + */ + lookup = (unicode_charset *)PyMem_Malloc(sizeof(unicode_charset)); + if (lookup == NULL) { + PyErr_NoMemory(); + goto onError; + } + blocks = 0; + for (i = UNICODE_CHARSET_BITMAPS - 1; i >= 0; i--) { + unsigned char *block = &bigmap[i << 5]; + for (j = blocks - 1; j >= 0; j--) + if (memcmp(lookup->bitmaps[j], block, + UNICODE_CHARSET_BITMAP_SIZE) == 0) + break; + if (j < 0) { + j = blocks; + DPRINTF("init_unicode_charset: Creating new block %i for %i\n", + j, i); + memcpy(lookup->bitmaps[j], block, UNICODE_CHARSET_BITMAP_SIZE); + blocks++; + } + else + DPRINTF("init_unicode_charset: Reusing block %i for %i\n", j, i); + lookup->bitmapindex[i] = j; + } + DPRINTF("init_unicode_charset: Map size: %i block(s) = %i bytes\n", + blocks, UNICODE_CHARSET_BITMAPS + + blocks * UNICODE_CHARSET_BITMAP_SIZE); + lookup = (unicode_charset *)PyMem_Realloc(lookup, + UNICODE_CHARSET_BITMAPS + + blocks * UNICODE_CHARSET_BITMAP_SIZE); + if (lookup == NULL) { + PyErr_NoMemory(); + goto onError; + } + + /* Invert bitmaps if negative matching is requested */ + if (!logic) { + register unsigned char *bitmap = &lookup->bitmaps[0][0]; + DPRINTF("init_unicode_charset: inverting bitmaps\n"); + for (i = 0; i < blocks * UNICODE_CHARSET_BITMAP_SIZE; i++) + bitmap[i] ^= 0xFF; + } + + cs->mode = MXCHARSET_UCS2MODE; + cs->lookup = (void *)lookup; + return 0; + + onError: + if (lookup) + PyMem_Free((void *)lookup); + cs->lookup = 0; + return -1; +} + +#endif + +/* allocation */ + +static +PyObject *mxCharSet_New(PyObject *definition) +{ + mxCharSetObject *cs; + + cs = PyObject_NEW(mxCharSetObject, &mxCharSet_Type); + if (cs == NULL) + return NULL; + Py_INCREF(definition); + cs->definition = definition; + cs->lookup = NULL; + cs->mode = -1; + + if (PyString_Check(definition)) { + if (init_string_charset(cs, definition)) + goto onError; + } +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(definition)) { + if (init_unicode_charset(cs, definition)) + goto onError; + } +#endif + else + Py_Error(PyExc_TypeError, + "character set definition must be string or unicode"); + + return (PyObject *)cs; + + onError: + Py_DECREF(cs); + return NULL; +} + +Py_C_Function( mxCharSet_CharSet, + "CharSet(definition)\n\n" + "Create a character set matching object from the string" + ) +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *definition; + + Py_GetArg("O:CharSet", definition); + return mxCharSet_New(definition); + + onError: + return NULL; +} + +static +void mxCharSet_Free(mxCharSetObject *cs) +{ + Py_XDECREF(cs->definition); + if (cs->lookup) + PyMem_Free(cs->lookup); + PyObject_Del(cs); +} + +/* C APIs */ + +#define cs ((mxCharSetObject *)self) + +int mxCharSet_ContainsChar(PyObject *self, + register unsigned char ch) +{ + if (!mxCharSet_Check(self)) { + PyErr_BadInternalCall(); + goto onError; + } + + if (cs->mode == MXCHARSET_8BITMODE) { + unsigned char *bitmap = ((string_charset *)cs->lookup)->bitmap; + return ((bitmap[ch >> 3] & (1 << (ch & 7))) != 0); + } +#ifdef HAVE_UNICODE + else if (cs->mode == MXCHARSET_UCS2MODE) { + unicode_charset *lookup = (unicode_charset *)cs->lookup; + unsigned char *bitmap = lookup->bitmaps[lookup->bitmapindex[0]]; + return ((bitmap[ch >> 3] & (1 << (ch & 7))) != 0); + } +#endif + else { + Py_Error(mxTextTools_Error, + "unsupported character set mode"); + } + + onError: + return -1; +} + +#ifdef HAVE_UNICODE + +int mxCharSet_ContainsUnicodeChar(PyObject *self, + register Py_UNICODE ch) +{ + if (!mxCharSet_Check(self)) { + PyErr_BadInternalCall(); + goto onError; + } + + if (cs->mode == MXCHARSET_8BITMODE) { + unsigned char *bitmap = ((string_charset *)cs->lookup)->bitmap; + if (ch >= 256) + return 0; + return ((bitmap[ch >> 3] & (1 << (ch & 7))) != 0); + } + else if (cs->mode == MXCHARSET_UCS2MODE) { + unicode_charset *lookup = (unicode_charset *)cs->lookup; + unsigned char *bitmap = lookup->bitmaps[lookup->bitmapindex[ch >> 8]]; + return ((bitmap[(ch >> 3) & 31] & (1 << (ch & 7))) != 0); + } + else { + Py_Error(mxTextTools_Error, + "unsupported character set mode"); + } + + onError: + return -1; +} + +#endif + +static +int mxCharSet_Contains(PyObject *self, + PyObject *other) +{ + if (PyString_Check(other)) { + Py_Assert(PyString_GET_SIZE(other) == 1, + PyExc_TypeError, + "expected a single character"); + return mxCharSet_ContainsChar(self, PyString_AS_STRING(other)[0]); + } +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(other)) { + Py_Assert(PyUnicode_GET_SIZE(other) == 1, + PyExc_TypeError, + "expected a single unicode character"); + return mxCharSet_ContainsUnicodeChar(self, + PyUnicode_AS_UNICODE(other)[0]); + } +#endif + else + Py_Error(PyExc_TypeError, + "expected string or unicode character"); + + onError: + return -1; +} + +/* In mode 1, find the position of the first character in text + belonging to set. This may also be stop or start-1 in case no such + character is found during the search (depending on the direction). + + In mode 0, find the first character not in set. This may also be + stop or start-1 in case no such character is found during the + search (depending on the direction). + + The search is done in the slice start:stop. + + -2 is returned in case of an error. + +*/ + +static +int mxCharSet_FindChar(PyObject *self, + unsigned char *text, + Py_ssize_t start, + Py_ssize_t stop, + const int mode, + const int direction) +{ + register Py_ssize_t i; + register unsigned int c; + register unsigned int block; + unsigned char *bitmap; + + if (!mxCharSet_Check(self)) { + PyErr_BadInternalCall(); + goto onError; + } + + if (cs->mode == MXCHARSET_8BITMODE) + bitmap = ((string_charset *)cs->lookup)->bitmap; +#ifdef HAVE_UNICODE + else if (cs->mode == MXCHARSET_UCS2MODE) { + unicode_charset *lookup = (unicode_charset *)cs->lookup; + bitmap = lookup->bitmaps[lookup->bitmapindex[0]]; + } +#endif + else { + Py_Error(mxTextTools_Error, + "unsupported character set mode"); + } + + if (direction > 0) { + if (mode) + /* Find first char in set */ + for (i = start; i < stop; i++) { + c = text[i]; + block = bitmap[c >> 3]; + if (block && ((block & (1 << (c & 7))) != 0)) + break; + } + else + /* Find first char not in set */ + for (i = start; i < stop; i++) { + c = text[i]; + block = bitmap[c >> 3]; + if (!block || ((block & (1 << (c & 7))) == 0)) + break; + } + } + else { + if (mode) + /* Find first char in set, searching from the end */ + for (i = stop - 1; i >= start; i--) { + c = text[i]; + block = bitmap[c >> 3]; + if (block && ((block & (1 << (c & 7))) != 0)) + break; + } + else + /* Find first char not in set, searching from the end */ + for (i = stop - 1; i >= start; i--) { + c = text[i]; + block = bitmap[c >> 3]; + if (!block || ((block & (1 << (c & 7))) == 0)) + break; + } + } + return i; + + onError: + return -2; +} + +#ifdef HAVE_UNICODE + +static +int mxCharSet_FindUnicodeChar(PyObject *self, + Py_UNICODE *text, + Py_ssize_t start, + Py_ssize_t stop, + const int mode, + const int direction) +{ + register int i; + register unsigned int c; + register unsigned int block; + unsigned char *bitmap; + + if (!mxCharSet_Check(self)) { + PyErr_BadInternalCall(); + goto onError; + } + + if (cs->mode == MXCHARSET_8BITMODE) { + bitmap = ((string_charset *)cs->lookup)->bitmap; + if (direction > 0) { + if (mode) + /* Find first char in set */ + for (i = start; i < stop; i++) { + c = text[i]; + if (c > 256) + continue; + block = bitmap[c >> 3]; + if (block && ((block & (1 << (c & 7))) != 0)) + break; + } + else + /* Find first char not in set */ + for (i = start; i < stop; i++) { + c = text[i]; + if (c > 256) + break; + block = bitmap[c >> 3]; + if (!block || ((block & (1 << (c & 7))) == 0)) + break; + } + } + else { + if (mode) + /* Find first char in set, searching from the end */ + for (i = stop - 1; i >= start; i--) { + c = text[i]; + if (c > 256) + continue; + block = bitmap[c >> 3]; + if (block && ((block & (1 << (c & 7))) != 0)) + break; + } + else + /* Find first char not in set, searching from the end */ + for (i = stop - 1; i >= start; i--) { + c = text[i]; + if (c > 256) + break; + block = bitmap[c >> 3]; + if (!block || ((block & (1 << (c & 7))) == 0)) + break; + } + } + return i; + } + +#ifdef HAVE_UNICODE + else if (cs->mode == MXCHARSET_UCS2MODE) { + unicode_charset *lookup = (unicode_charset *)cs->lookup; + if (direction > 0) { + if (mode) + /* Find first char in set */ + for (i = start; i < stop; i++) { + c = text[i]; + bitmap = lookup->bitmaps[lookup->bitmapindex[c >> 8]]; + block = bitmap[(c >> 3) & 31]; + if (block && ((block & (1 << (c & 7))) != 0)) + break; + } + else + /* Find first char not in set */ + for (i = start; i < stop; i++) { + c = text[i]; + bitmap = lookup->bitmaps[lookup->bitmapindex[c >> 8]]; + block = bitmap[(c >> 3) & 31]; + if (!block || ((block & (1 << (c & 7))) == 0)) + break; + } + } + else { + if (mode) + /* Find first char in set, searching from the end */ + for (i = stop - 1; i >= start; i--) { + c = text[i]; + bitmap = lookup->bitmaps[lookup->bitmapindex[c >> 8]]; + block = bitmap[(c >> 3) & 31]; + if (block && ((block & (1 << (c & 7))) != 0)) + break; + } + else + /* Find first char not in set, searching from the end */ + for (i = stop - 1; i >= start; i--) { + c = text[i]; + bitmap = lookup->bitmaps[lookup->bitmapindex[c >> 8]]; + block = bitmap[(c >> 3) & 31]; + if (!block || ((block & (1 << (c & 7))) == 0)) + break; + } + } + return i; + } +#endif + else { + Py_Error(mxTextTools_Error, + "unsupported character set mode"); + } + + onError: + return -2; +} + +#endif + +/* Return the position of the first character in text[start:stop] + occurring in set or -1 in case no such character exists. + +*/ + +static +int mxCharSet_Search(PyObject *self, + PyObject *text, + Py_ssize_t start, + Py_ssize_t stop, + int direction) +{ + Py_ssize_t position; + + if (PyString_Check(text)) { + Py_CheckStringSlice(text, start, stop); + position = mxCharSet_FindChar(self, + (unsigned char *)PyString_AS_STRING(text), + start, + stop, + 1, + direction); + } +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) { + Py_CheckUnicodeSlice(text, start, stop); + position = mxCharSet_FindUnicodeChar(self, + PyUnicode_AS_UNICODE(text), + start, + stop, + 1, + direction); + } +#endif + else + Py_Error(PyExc_TypeError, + "expected string or unicode"); + + if ((direction > 0 && position >= stop) || + (direction <= 0 && position < start)) + position = -1; + return position; + + onError: + return -2; +} + +/* Return the longest match of characters from set in + text[start:stop]. + + If direction is positive, the search is done from the left (longest + prefix), otherwise it is started from the right (longest suffix). + + -1 is returned in case of an error. + +*/ + +Py_ssize_t mxCharSet_Match(PyObject *self, + PyObject *text, + Py_ssize_t start, + Py_ssize_t stop, + int direction) +{ + Py_ssize_t position; + + if (PyString_Check(text)) { + Py_CheckStringSlice(text, start, stop); + position = mxCharSet_FindChar(self, + (unsigned char *)PyString_AS_STRING(text), + start, + stop, + 0, + direction); + } +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) { + Py_CheckUnicodeSlice(text, start, stop); + position = mxCharSet_FindUnicodeChar(self, + PyUnicode_AS_UNICODE(text), + start, + stop, + 0, + direction); + } +#endif + else + Py_Error(PyExc_TypeError, + "expected string or unicode"); + + if (position < -1) + goto onError; + if (direction > 0) + return position - start; + else + return stop-1 - position; + + onError: + return -1; +} + +/* Stips off characters appearing in the character set from text[start:stop] + and returns the result as Python string object. + + where indicates the mode: + where < 0: strip left only + where = 0: strip left and right + where > 0: strip right only + +*/ +static +PyObject *mxCharSet_Strip(PyObject *self, + PyObject *text, + Py_ssize_t start, + Py_ssize_t stop, + Py_ssize_t where) +{ + Py_ssize_t left,right; + + if (!mxCharSet_Check(self)) { + PyErr_BadInternalCall(); + goto onError; + } + + if (PyString_Check(text)) { + Py_CheckStringSlice(text, start, stop); + + /* Strip left */ + if (where <= 0) { + left = mxCharSet_FindChar(self, + (unsigned char *)PyString_AS_STRING(text), + start, + stop, + 0, + 1); + if (left < 0) + goto onError; + } + else + left = start; + + /* Strip right */ + if (where >= 0) { + right = mxCharSet_FindChar(self, + (unsigned char *)PyString_AS_STRING(text), + left, + stop, + 0, + -1) + 1; + if (right < 0) + goto onError; + } + else + right = stop; + + return PyString_FromStringAndSize(PyString_AS_STRING(text) + left, + max(right - left, 0)); + } +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) { + Py_CheckUnicodeSlice(text, start, stop); + + /* Strip left */ + if (where <= 0) { + left = mxCharSet_FindUnicodeChar(self, + PyUnicode_AS_UNICODE(text), + start, + stop, + 0, + 1); + if (left < 0) + goto onError; + } + else + left = start; + + /* Strip right */ + if (where >= 0) { + right = mxCharSet_FindUnicodeChar(self, + PyUnicode_AS_UNICODE(text), + start, + stop, + 0, + -1) + 1; + if (right < 0) + goto onError; + } + else + right = stop; + + return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text) + left, + max(right - left, 0)); + } +#endif + else + Py_Error(PyExc_TypeError, + "expected string or unicode"); + + onError: + return NULL; +} + +static +PyObject *mxCharSet_Split(PyObject *self, + PyObject *text, + Py_ssize_t start, + Py_ssize_t text_len, + int include_splits) +{ + PyObject *list = NULL; + PyObject *s; + register Py_ssize_t x; + Py_ssize_t listitem = 0; + Py_ssize_t listsize = INITIAL_LIST_SIZE; + + if (!mxCharSet_Check(self)) { + PyErr_BadInternalCall(); + goto onError; + } + + list = PyList_New(listsize); + if (!list) + goto onError; + + if (PyString_Check(text)) { + unsigned char *tx = (unsigned char *)PyString_AS_STRING(text); + + Py_CheckStringSlice(text, start, text_len); + + x = start; + while (x < text_len) { + Py_ssize_t z; + + /* Skip all text in set (include_splits == 0), not in set + (include_splits == 1) */ + z = x; + x = mxCharSet_FindChar(self, tx, x, text_len, include_splits, 1); + + /* Append the slice to list */ + if (include_splits) { + s = PyString_FromStringAndSize((char *)&tx[z], x - z); + if (!s) + goto onError; + if (listitem < listsize) + PyList_SET_ITEM(list,listitem,s); + else { + PyList_Append(list,s); + Py_DECREF(s); + } + listitem++; + + if (x >= text_len) + break; + } + + /* Skip all text in set (include_splits == 1), not in set + (include_splits == 0) */ + z = x; + x = mxCharSet_FindChar(self, tx, x, text_len, !include_splits, 1); + + /* Append the slice to list if it is not empty */ + if (x > z) { + s = PyString_FromStringAndSize((char *)&tx[z], x - z); + if (!s) + goto onError; + if (listitem < listsize) + PyList_SET_ITEM(list,listitem,s); + else { + PyList_Append(list,s); + Py_DECREF(s); + } + listitem++; + } + } + + } +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) { + Py_UNICODE *tx = PyUnicode_AS_UNICODE(text); + + Py_CheckUnicodeSlice(text, start, text_len); + + x = start; + while (x < text_len) { + Py_ssize_t z; + + /* Skip all text in set (include_splits == 0), not in set + (include_splits == 1) */ + z = x; + x = mxCharSet_FindUnicodeChar(self, tx, x, text_len, include_splits, 1); + + /* Append the slice to list */ + if (include_splits) { + s = PyUnicode_FromUnicode(&tx[z], x - z); + if (!s) + goto onError; + if (listitem < listsize) + PyList_SET_ITEM(list,listitem,s); + else { + PyList_Append(list,s); + Py_DECREF(s); + } + listitem++; + + if (x >= text_len) + break; + } + + /* Skip all text in set (include_splits == 1), not in set + (include_splits == 0) */ + z = x; + x = mxCharSet_FindUnicodeChar(self, tx, x, text_len, !include_splits, 1); + + /* Append the slice to list if it is not empty */ + if (x > z) { + s = PyUnicode_FromUnicode(&tx[z], x - z); + if (!s) + goto onError; + if (listitem < listsize) + PyList_SET_ITEM(list,listitem,s); + else { + PyList_Append(list,s); + Py_DECREF(s); + } + listitem++; + } + } + } +#endif + else + Py_Error(PyExc_TypeError, + "expected string or unicode"); + + /* Resize list if necessary */ + if (listitem < listsize) + PyList_SetSlice(list, listitem, listsize, (PyObject*)NULL); + + return list; + + onError: + Py_XDECREF(list); + return NULL; +} + +/* methods */ + +Py_C_Function( mxCharSet_contains, + ".contains(char)\n\n" + ) +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *chr; + int rc; + + Py_GetArg("O:CharSet.contains", chr); + + rc = mxCharSet_Contains(self, chr); + if (rc < 0) + goto onError; + return PyInt_FromLong(rc); + + onError: + return NULL; +} + +Py_C_Function( mxCharSet_search, + ".search(text[, direction=1, start=0, stop=len(text)])\n\n" + ) +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *text; + int direction = 1; + Py_ssize_t start = 0, stop = INT_MAX; + int rc; + + Py_Get4Args("O|iii:CharSet.search", text, direction, start, stop); + + rc = mxCharSet_Search(self, text, start, stop, direction); + if (rc == -1) + Py_ReturnNone(); + if (rc < -1) + goto onError; + return PyInt_FromLong(rc); + + onError: + return NULL; +} + +Py_C_Function( mxCharSet_match, + ".match(text[, direction=1, start=0, stop=len(text)])\n\n" + ) +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *text; + int direction = 1; + Py_ssize_t start = 0, stop = INT_MAX; + int rc; + + Py_Get4Args("O|iii:CharSet.match", text, direction, start, stop); + + rc = mxCharSet_Match(self, text, start, stop, direction); + if (rc < 0) + goto onError; + return PyInt_FromLong(rc); + + onError: + return NULL; +} + +Py_C_Function( mxCharSet_split, + ".split(text[, start=0, stop=len(text)])\n\n" + ) +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *text; + Py_ssize_t start = 0, stop = INT_MAX; + + Py_Get3Args("O|ii:CharSet.split", text, start, stop); + + return mxCharSet_Split(self, text, start, stop, 0); + + onError: + return NULL; +} + +Py_C_Function( mxCharSet_splitx, + ".splitx(text[, start=0, stop=len(text)])\n\n" + ) +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *text; + Py_ssize_t start = 0, stop = INT_MAX; + + Py_Get3Args("O|ii:CharSet.splitx", text, start, stop); + + return mxCharSet_Split(self, text, start, stop, 1); + + onError: + return NULL; +} + +Py_C_Function( mxCharSet_strip, + ".strip(text[, where=0, start=0, stop=len(text)])\n\n" + ) +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *text; + Py_ssize_t where = 0; + Py_ssize_t start = 0, stop = INT_MAX; + + Py_Get4Args("O|iii:CharSet.strip", text, where, start, stop); + + return mxCharSet_Strip(self, text, start, stop, where); + + onError: + return NULL; +} + +#ifdef COPY_PROTOCOL +Py_C_Function( mxCharSet_copy, + "copy([memo])\n\n" + "Return a new reference for the instance. This function\n" + "is used for the copy-protocol. Real copying doesn't take\n" + "place, since the instances are immutable.") +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *memo; + + Py_GetArg("|O",memo); + Py_INCREF(cs); + return (PyObject *)cs; + onError: + return NULL; +} +#endif + +#undef cs + +/* --- slots --- */ + +static +PyObject *mxCharSet_Repr(mxCharSetObject *self) +{ + PyObject *v; + char t[500], *reprstr; + + v = PyObject_Repr(self->definition); + if (v == NULL) + return NULL; + reprstr = PyString_AsString(v); + if (reprstr == NULL) + return NULL; + sprintf(t, "", + reprstr, (long)self); + Py_DECREF(v); + return PyString_FromString(t); +} + +static +PyObject *mxCharSet_GetAttr(mxCharSetObject *self, + char *name) +{ + PyObject *v; + + if (Py_WantAttr(name,"definition")) { + v = self->definition; + Py_INCREF(v); + return v; + } + + else if (Py_WantAttr(name,"__members__")) + return Py_BuildValue("[s]", + "definition"); + + return Py_FindMethod(mxCharSet_Methods, (PyObject *)self, (char *)name); +} + +/* Python Type Tables */ + +static +PySequenceMethods mxCharSet_TypeAsSequence = { + (lenfunc)0, /*sq_length*/ + (binaryfunc)0, /*sq_concat*/ + (ssizeargfunc)0, /*sq_repeat*/ + (ssizeargfunc)0, /*sq_item*/ + (ssizessizeargfunc)0, /*sq_slice*/ + (ssizeobjargproc)0, /*sq_ass_item*/ + (ssizessizeobjargproc)0, /*sq_ass_slice*/ +#if PY_VERSION_HEX >= 0x02000000 + (objobjproc)mxCharSet_Contains, /*sq_contains*/ +#endif +}; + +PyTypeObject mxCharSet_Type = { + PyObject_HEAD_INIT(0) /* init at startup ! */ + 0, /* ob_size */ + "Character Set", /* tp_name */ + sizeof(mxCharSetObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)mxCharSet_Free, /* tp_dealloc */ + (printfunc)0, /* tp_print */ + (getattrfunc)mxCharSet_GetAttr, /* tp_getattr */ + (setattrfunc)0, /* tp_setattr */ + (cmpfunc)0, /* tp_compare */ + (reprfunc)mxCharSet_Repr, /* tp_repr */ + 0, /* tp_as_number */ + &mxCharSet_TypeAsSequence, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + (hashfunc)0, /* tp_hash */ + (ternaryfunc)0, /* tp_call */ + (reprfunc)0, /* tp_str */ + (getattrofunc)0, /* tp_getattro */ + (setattrofunc)0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + (char*) 0, /* tp_doc */ +}; + +/* Python Method Table */ + +statichere +PyMethodDef mxCharSet_Methods[] = +{ + Py_MethodListEntry("contains",mxCharSet_contains), + Py_MethodListEntry("search",mxCharSet_search), + Py_MethodListEntry("match",mxCharSet_match), + Py_MethodListEntry("strip",mxCharSet_strip), + Py_MethodListEntry("split",mxCharSet_split), + Py_MethodListEntry("splitx",mxCharSet_splitx), +#ifdef COPY_PROTOCOL + Py_MethodListEntry("__deepcopy__",mxCharSet_copy), + Py_MethodListEntry("__copy__",mxCharSet_copy), +#endif + {NULL,NULL} /* end of list */ +}; + +/* --- Tag Table Object ------------------------------------------------*/ + +staticforward PyMethodDef mxTagTable_Methods[]; + +PyObject *mxTagTable_New(PyObject *definition, + int tabletype, + int cacheable); + +/* internal APIs */ + +static +PyObject *tc_get_item(register PyObject *obj, + register Py_ssize_t i) +{ + if (PyTuple_Check(obj)) { + if (i > PyTuple_GET_SIZE(obj)) + return NULL; + return PyTuple_GET_ITEM(obj, i); + } + else if (PyList_Check(obj)) { + if (i > PyList_GET_SIZE(obj)) + return NULL; + return PyList_GET_ITEM(obj, i); + } + else + return NULL; +} + +static +Py_ssize_t tc_length(register PyObject *obj) +{ + if (obj == NULL) + return -1; + else if (PyTuple_Check(obj)) + return PyTuple_GET_SIZE(obj); + else if (PyList_Check(obj)) + return PyList_GET_SIZE(obj); + else + return -1; +} + +/* Add a jump target to the jump dictionary */ + +static +Py_ssize_t tc_add_jumptarget(PyObject *jumpdict, + PyObject *targetname, + Py_ssize_t index) +{ + PyObject *v; + + v = PyDict_GetItem(jumpdict, targetname); + if (v != NULL) + Py_ErrorWithArg(PyExc_TypeError, + "tag table entry %d: " + "jump target already defined", (unsigned int) index); + v = PyInt_FromLong(index); + if (v == NULL) + goto onError; + if (PyDict_SetItem(jumpdict, targetname, v)) + goto onError; + Py_DECREF(v); + return 0; + + onError: + return -1; +} + +/* Convert a string command argument to either an 8-bit string or + Unicode depending on the tabletype. */ + +static +PyObject *tc_convert_string_arg(PyObject *arg, + Py_ssize_t tableposition, + int tabletype) +{ + /* Convert to strings */ + if (tabletype == MXTAGTABLE_STRINGTYPE) { + if (PyString_Check(arg)) + return arg; +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(arg)) { + Py_DECREF(arg); + arg = PyUnicode_AsEncodedString(arg, + NULL, + NULL); + if (arg == NULL) + Py_ErrorWithArg(PyExc_TypeError, + "tag table entry %d: " + "conversion from Unicode to " + "string failed", (unsigned int)tableposition); + } +#endif + else + Py_ErrorWithArg(PyExc_TypeError, + "tag table entry %d: " + "command argument must be a " + "string or unicode", (unsigned int)tableposition); + } + +#ifdef HAVE_UNICODE + /* Convert to Unicode */ + else if (tabletype == MXTAGTABLE_UNICODETYPE) { + if (PyUnicode_Check(arg)) + return arg; + else if (PyString_Check(arg)) { + Py_DECREF(arg); + arg = PyUnicode_Decode(PyString_AS_STRING(arg), + PyString_GET_SIZE(arg), + NULL, + NULL); + if (arg == NULL) + Py_ErrorWithArg(PyExc_TypeError, + "tag table entry %d: " + "conversion from string to " + "Unicode failed", (unsigned int)tableposition); + } + else + Py_ErrorWithArg(PyExc_TypeError, + "tag table entry %d: " + "command argument must be a " + "string or unicode", (unsigned int)tableposition); + } +#endif + + else + Py_Error(mxTextTools_Error, + "unsupported table type"); + + return arg; + + onError: + return NULL; +} + +/* Cleanup any references in the tag table. */ + +static +int tc_cleanup(mxTagTableObject *tagtable) +{ + Py_ssize_t i; + for (i = 0; i < tagtable->ob_size; i++) { + mxTagTableEntry *tagtableentry = &tagtable->entry[i]; + + Py_XDECREF(tagtableentry->tagobj); + tagtableentry->tagobj = NULL; + Py_XDECREF(tagtableentry->args); + tagtableentry->args = NULL; + } + return 0; +} + +/* Initialize the tag table (this is the actual Tag Table compiler) */ + +static +int init_tag_table(mxTagTableObject *tagtable, + PyObject *table, + Py_ssize_t size, + int tabletype, + int cacheable) +{ + Py_ssize_t i; + PyObject *entry; + Py_ssize_t entry_len; + PyObject *tagobj, *command, *args = 0, *je, *jne; + PyObject *jumpdict, *v; + int secondpass, own_args = 0; + + jumpdict = PyDict_New(); + if (jumpdict == NULL) + return -1; + + /* Reset to all fields to 0 */ + memset(&tagtable->entry[0], 0, size * sizeof(mxTagTableEntry)); + + /* First pass */ + secondpass = 0; + for (i = 0; i < size; i++) { + mxTagTableEntry *tagtableentry = &tagtable->entry[i]; + + /* Get table entry i and parse it */ + entry = tc_get_item(table, i); + if (entry == NULL) { + Py_ErrorWithArg(PyExc_TypeError, + "tag table entry %d: " + "not found or not a supported entry type", (unsigned int)i); + } + + /* Special handling for jump marks (args is set to the jump + mark string, jump target index is the next table entry) */ + if (PyString_Check(entry)) { + if (tc_add_jumptarget(jumpdict, entry, i + 1)) + goto onError; + tagtableentry->tagobj = NULL; + tagtableentry->cmd = MATCH_JUMPTARGET; + tagtableentry->flags = 0; + Py_INCREF(entry); + tagtableentry->args = entry; + tagtableentry->jne = 0; + tagtableentry->je = 1; + continue; + } + + /* Get entry length */ + entry_len = tc_length(entry); + if (entry_len < 3) { + Py_ErrorWithArg(PyExc_TypeError, + "tag table entry %d: " + "expected an entry of the form " + "(tagobj,command,arg[,jne[,je]])", (unsigned int)i); + } + + /* Decode entry parts: (tagobj, command, args[, jne[, je]]) */ + tagobj = tc_get_item(entry, 0); + command = tc_get_item(entry, 1); + args = tc_get_item(entry, 2); + if (entry_len >= 4) + jne = tc_get_item(entry, 3); + else + jne = NULL; + if (entry_len >= 5) + je = tc_get_item(entry, 4); + else + je = NULL; + + if (tagobj == NULL || + command == NULL || + args == NULL || + (entry_len >= 4 && jne == NULL) || + (entry_len >= 5 && je == NULL)) { + Py_ErrorWithArg(PyExc_TypeError, + "tag table entry %d: " + "expected an entry of the form " + "(tagobj,command,arg[,jne[,je]])",(unsigned int) i); + } + + /* Store tagobj, None gets converted to NULL */ + if (tagobj != Py_None) + Py_INCREF(tagobj); + else + tagobj = NULL; + tagtableentry->tagobj = tagobj; + + /* Decode command and flags */ + Py_AssertWithArg(PyInt_Check(command), + PyExc_TypeError, + "tag table entry %d: " + "command must be an integer",(unsigned int)i); + tagtableentry->cmd = PyInt_AS_LONG(command) & 0xFF; + tagtableentry->flags = PyInt_AS_LONG(command) - tagtableentry->cmd; + + /* Check command arguments */ + Py_INCREF(args); + own_args = 1; + + switch (tagtableentry->cmd) { + + case MATCH_JUMP: /* == MATCH_FAIL */ + case MATCH_EOF: + case MATCH_LOOP: + /* args is ignored */ + break; + + case MATCH_SKIP: + case MATCH_MOVE: + case MATCH_LOOPCONTROL: + Py_AssertWithArg(PyInt_Check(args), + PyExc_TypeError, + "tag table entry %d: " + "Skip|Move|LoopControl command argument " + "must be an integer", (unsigned int)i); + break; + + case MATCH_JUMPTARGET: + Py_AssertWithArg(PyString_Check(args), + PyExc_TypeError, + "tag table entry %d: " + "JumpMark command argument must be a string",(unsigned int)i); + if (tc_add_jumptarget(jumpdict, args, i + 1)) + goto onError; + break; + + case MATCH_ALLIN: + case MATCH_ALLNOTIN: + case MATCH_IS: + case MATCH_ISIN: + case MATCH_ISNOTIN: + case MATCH_WORD: + case MATCH_WORDSTART: + case MATCH_WORDEND: + args = tc_convert_string_arg(args, i, tabletype); + if (args == NULL) + goto onError; + break; + + case MATCH_ALLINSET: + case MATCH_ISINSET: + Py_AssertWithArg(PyString_Check(args) && + PyString_GET_SIZE(args) == 32, + PyExc_TypeError, + "tag table entry %d: " + "AllInSet|IsInSet command argument must " + "be a set() string",(unsigned int)i); + break; + + case MATCH_ALLINCHARSET: + case MATCH_ISINCHARSET: + Py_AssertWithArg(mxCharSet_Check(args), + PyExc_TypeError, + "tag table entry %d: " + "AllInCharSet|IsInCharSet command argument must " + "be a CharSet instance",(unsigned int)i); + break; + + case MATCH_SWORDSTART: /* == MATCH_NOWORD */ + case MATCH_SWORDEND: + case MATCH_SFINDWORD: + Py_AssertWithArg(mxTextSearch_Check(args), + PyExc_TypeError, + "tag table entry %d: " + "sWordStart|sWordEnd|sFindWord command " + "argument must be a TextSearch search " + "object",(unsigned int)i); + break; + + case MATCH_TABLE: + case MATCH_SUBTABLE: + Py_AssertWithArg(mxTagTable_Check(args) || + PyTuple_Check(args) || + PyList_Check(args) || + (PyInt_Check(args) && + PyInt_AS_LONG(args) == MATCH_THISTABLE), + PyExc_TypeError, + "tag table entry %d: " + "Table|SubTable command argument " + "must be a tag table tuple/object or " + "ThisTable", (unsigned int)i); + /* XXX We shouldn't recursively compile tag table tuples here + because this will slow down the compile process + too much and it's not clear whether this particular + table will ever be used during tagging. + */ + if (!mxTagTable_Check(args) && !PyInt_Check(args)) { + Py_DECREF(args); + args = mxTagTable_New(args, tabletype, cacheable); + if (args == NULL) + goto onError; + } + break; + + case MATCH_TABLEINLIST: + case MATCH_SUBTABLEINLIST: + Py_AssertWithArg(PyTuple_Check(args) && + PyTuple_GET_SIZE(args) == 2 && + PyList_Check(PyTuple_GET_ITEM(args, 0)) && + PyInt_Check(PyTuple_GET_ITEM(args, 1)), + PyExc_TypeError, + "tag table entry %d: " + "TableInList|SubTableInList command argument " + "must be a 2-tuple (list, integer)", + (unsigned int)i); + break; + + case MATCH_CALL: + Py_AssertWithArg(PyCallable_Check(args), + PyExc_TypeError, + "tag table entry %d: " + "Call command argument " + "must be a callable object", + (unsigned int)i); + break; + + case MATCH_CALLARG: + Py_AssertWithArg(PyTuple_Check(args) && + PyTuple_GET_SIZE(args) > 0 && + PyCallable_Check(PyTuple_GET_ITEM(args, 0)), + PyExc_TypeError, + "tag table entry %d: " + "CallArg command argument " + "must be a tuple (fct,[arg0,arg1,...])", + (unsigned int)i); + break; + + default: + Py_ErrorWith2Args(PyExc_TypeError, + "tag table entry %d: " + "unknown command integer: %i", + (unsigned int)i, tagtableentry->cmd); + + } + + /* Store command args */ + tagtableentry->args = args; + own_args = 0; + + /* Decode jump offsets */ + if (jne) { + if (PyInt_Check(jne)) + tagtableentry->jne = PyInt_AS_LONG(jne); + else if (PyString_Check(jne)) { + /* Mark for back-patching */ + tagtableentry->jne = -424242; + secondpass = 1; + } + else + Py_ErrorWithArg(PyExc_TypeError, + "tag table entry %d: " + "jne must be an integer or string", (unsigned int)i); + } + else + tagtableentry->jne = 0; + + if (je) { + if (PyInt_Check(je)) + tagtableentry->je = PyInt_AS_LONG(je); + else if (PyString_Check(je)) { + /* Mark for back-patching */ + tagtableentry->je = -424242; + secondpass = 1; + } + else + Py_ErrorWithArg(PyExc_TypeError, + "tag table entry %d: " + "je must be an integer or string", (unsigned int)i); + } + else + tagtableentry->je = 1; + } + + /* Second pass (needed to patch string jump targets) */ + if (secondpass) + for (i = 0; i < size; i++) { + mxTagTableEntry *tagtableentry = &tagtable->entry[i]; + + if (tagtableentry->je != -424242 && + tagtableentry->jne != -424242) + continue; + + /* Entry (most probably) needs back-patching */ + entry = tc_get_item(table, i); + if (entry == NULL) { + Py_ErrorWithArg(PyExc_TypeError, + "tag table entry %d: " + "unexpected error (not found)", (unsigned int)i); + } + + /* Get entry length */ + entry_len = tc_length(entry); + if (entry_len < 0) { + Py_ErrorWithArg(PyExc_TypeError, + "tag table entry %d: " + "unexpected error (no length)", (unsigned int)i); + } + + /* Decode jump offsets */ + if (entry_len >= 4) + jne = tc_get_item(entry, 3); + else + jne = NULL; + if (entry_len >= 5) + je = tc_get_item(entry, 4); + else + je = NULL; + + /* Patch jump offsets */ + if (jne && PyString_Check(jne)) { + v = PyDict_GetItem(jumpdict, jne); + if (v == NULL || !PyInt_Check(v)) + Py_ErrorWith2Args(PyExc_TypeError, + "tag table entry %d: " + "jne jump target '%s' not found", + (unsigned int)i, PyString_AS_STRING(jne)); + tagtableentry->jne = PyInt_AS_LONG(v) - i; + } + if (je && PyString_Check(je)) { + v = PyDict_GetItem(jumpdict, je); + if (v == NULL || !PyInt_Check(v)) + Py_ErrorWith2Args(PyExc_TypeError, + "tag table entry %d: " + "je jump target '%s' not found", + (unsigned int)i, PyString_AS_STRING(je)); + tagtableentry->je = PyInt_AS_LONG(v) - i; + } + } + + Py_DECREF(jumpdict); + return 0; + + onError: + if (own_args) { + Py_DECREF(args); + } + return -1; +} + +/* Check the cache for an already compiled TagTable for this + definition. Return NULL in case of an error, Py_None without + INCREF in case no such table was found or the TagTable object. */ + +static +PyObject *consult_tagtable_cache(PyObject *definition, + int tabletype, + int cacheable) +{ + PyObject *v, *key, *tt; + + if (!PyTuple_Check(definition) || !cacheable) + return Py_None; + + key = PyTuple_New(2); + if (key == NULL) + goto onError; + v = PyInt_FromLong((long) definition); + if (v == NULL) + goto onError; + PyTuple_SET_ITEM(key, 0, v); + v = PyInt_FromLong(tabletype); + if (v == NULL) + goto onError; + PyTuple_SET_ITEM(key, 1, v); + tt = PyDict_GetItem(mxTextTools_TagTables, key); + Py_DECREF(key); + if (tt != NULL) { + Py_INCREF(tt); + return tt; + } + return Py_None; + + onError: + return NULL; +} + +/* Adds the compiled tagtable to the cache. Returns -1 in case of an + error, 0 on success. */ + +static +int add_to_tagtable_cache(PyObject *definition, + int tabletype, + int cacheable, + PyObject *tagtable) +{ + PyObject *v, *key; + int rc; + + if (!PyTuple_Check(definition) || !cacheable) + return 0; + + key = PyTuple_New(2); + if (key == NULL) + goto onError; + v = PyInt_FromLong((long) definition); + if (v == NULL) + goto onError; + PyTuple_SET_ITEM(key, 0, v); + v = PyInt_FromLong(tabletype); + if (v == NULL) + goto onError; + PyTuple_SET_ITEM(key, 1, v); + + /* Hard-limit the cache size */ + if (PyDict_Size(mxTextTools_TagTables) >= MAX_TAGTABLES_CACHE_SIZE) + PyDict_Clear(mxTextTools_TagTables); + + rc = PyDict_SetItem(mxTextTools_TagTables, key, tagtable); + Py_DECREF(key); + if (rc) + goto onError; + return 0; + + onError: + return -1; +} + + +/* allocation */ + +PyObject *mxTagTable_New(PyObject *definition, + int tabletype, + int cacheable) +{ + mxTagTableObject *tagtable = 0; + PyObject *v; + Py_ssize_t size; + + /* First, consult the TagTable cache */ + v = consult_tagtable_cache(definition, tabletype, cacheable); + if (v == NULL) + goto onError; + else if (v != Py_None) + return v; + + size = tc_length(definition); + if (size < 0) + Py_Error(PyExc_TypeError, + "tag table definition must be a tuple or a list"); + + tagtable = PyObject_NEW_VAR(mxTagTableObject, &mxTagTable_Type, size); + if (tagtable == NULL) + goto onError; + if (cacheable) { + Py_INCREF(definition); + tagtable->definition = definition; + } + else + tagtable->definition = NULL; + tagtable->tabletype = tabletype; + + /* Compile table ... */ + if (init_tag_table(tagtable, definition, size, tabletype, cacheable)) + goto onError; + + /* Cache the compiled table if it is cacheable and derived from a + tuple */ + if (add_to_tagtable_cache(definition, tabletype, cacheable, + (PyObject *)tagtable)) + goto onError; + + return (PyObject *)tagtable; + + onError: + Py_XDECREF(tagtable); + return NULL; +} + +Py_C_Function( mxTagTable_TagTable, + "TagTable(definition[,cachable=1])\n\n" + ) +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *definition; + int cacheable = 1; + + Py_Get2Args("O|i:TagTable", definition, cacheable); + return mxTagTable_New(definition, 0, cacheable); + + onError: + return NULL; +} + +#ifdef HAVE_UNICODE +Py_C_Function( mxTagTable_UnicodeTagTable, + "TagTable(definition[,cachable=1])\n\n" + ) +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *definition; + int cacheable = 1; + + Py_Get2Args("O|i:UnicodeTagTable", definition, cacheable); + return mxTagTable_New(definition, 1, cacheable); + + onError: + return NULL; +} +#endif + +static +void mxTagTable_Free(mxTagTableObject *tagtable) +{ + tc_cleanup(tagtable); + Py_XDECREF(tagtable->definition); + PyObject_Del(tagtable); +} + +/* C APIs */ + +#define tagtable ((mxTagTableObject *)self) + +static +PyObject *mxTagTable_CompiledDefinition(PyObject *self) +{ + PyObject *tuple = 0, *v, *w; + Py_ssize_t i; + Py_ssize_t size; + + if (!mxTagTable_Check(self)) { + PyErr_BadInternalCall(); + goto onError; + } + + size = tagtable->ob_size; + tuple = PyTuple_New(size); + if (tuple == NULL) + goto onError; + + for (i = 0; i < size; i++) { + mxTagTableEntry *tagtableentry = &tagtable->entry[i]; + + /* Build tuple (tagobj, command, args, jne, je) */ + v = PyTuple_New(5); + if (v == NULL) + goto onError; + w = tagtableentry->tagobj; + if (w == NULL) + w = Py_None; + Py_INCREF(w); + PyTuple_SET_ITEM(v, 0, w); + PyTuple_SET_ITEM(v, 1, PyInt_FromLong(tagtableentry->cmd | + tagtableentry->flags)); + w = tagtableentry->args; + if (w == NULL) + w = Py_None; + Py_INCREF(w); + PyTuple_SET_ITEM(v, 2, w); + PyTuple_SET_ITEM(v, 3, PyInt_FromLong(tagtableentry->jne)); + PyTuple_SET_ITEM(v, 4, PyInt_FromLong(tagtableentry->je)); + if (PyErr_Occurred()) { + Py_DECREF(v); + goto onError; + } + PyTuple_SET_ITEM(tuple, i, v); + } + + return tuple; + + onError: + Py_XDECREF(tuple); + return NULL; +} + + +/* methods */ + +Py_C_Function( mxTagTable_compiled, + ".compiled()\n\n" + ) +{ + Py_NoArgsCheck(); + return mxTagTable_CompiledDefinition(self); + + onError: + return NULL; +} + +#ifdef COPY_PROTOCOL +Py_C_Function( mxTagTable_copy, + "copy([memo])\n\n" + "Return a new reference for the instance. This function\n" + "is used for the copy-protocol. Real copying doesn't take\n" + "place, since the instances are immutable.") +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *memo; + + Py_GetArg("|O",memo); + Py_INCREF(tagtable); + return (PyObject *)tagtable; + + onError: + return NULL; +} +#endif + +#undef tagtable + +/* --- slots --- */ + +static +PyObject *mxTagTable_Repr(mxTagTableObject *self) +{ + char t[100]; + + if (self->tabletype == MXTAGTABLE_STRINGTYPE) + sprintf(t,"", (long)self); + else if (self->tabletype == MXTAGTABLE_UNICODETYPE) + sprintf(t,"", (long)self); + else + sprintf(t,"", (long)self); + return PyString_FromString(t); +} + +static +PyObject *mxTagTable_GetAttr(mxTagTableObject *self, + char *name) +{ + PyObject *v; + + if (Py_WantAttr(name,"definition")) { + v = self->definition; + if (v == NULL) + v = Py_None; + Py_INCREF(v); + return v; + } + else if (Py_WantAttr(name,"__members__")) + return Py_BuildValue("[s]", + "definition"); + + return Py_FindMethod(mxTagTable_Methods, (PyObject *)self, (char *)name); +} + +/* Python Type Tables */ + +PyTypeObject mxTagTable_Type = { + PyObject_HEAD_INIT(0) /* init at startup ! */ + 0, /* ob_size */ + "Tag Table", /* tp_name */ + sizeof(mxTagTableObject), /* tp_basicsize */ + sizeof(mxTagTableEntry), /* tp_itemsize */ + /* methods */ + (destructor)mxTagTable_Free, /* tp_dealloc */ + (printfunc)0, /* tp_print */ + (getattrfunc)mxTagTable_GetAttr, /* tp_getattr */ + (setattrfunc)0, /* tp_setattr */ + (cmpfunc)0, /* tp_compare */ + (reprfunc)mxTagTable_Repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + (hashfunc)0, /* tp_hash */ + (ternaryfunc)0, /* tp_call */ + (reprfunc)0, /* tp_str */ + (getattrofunc)0, /* tp_getattro */ + (setattrofunc)0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + (char*) 0, /* tp_doc */ +}; + +/* Python Method Table */ + +statichere +PyMethodDef mxTagTable_Methods[] = +{ + Py_MethodListEntryNoArgs("compiled",mxTagTable_compiled), +#ifdef COPY_PROTOCOL + Py_MethodListEntry("__deepcopy__",mxTagTable_copy), + Py_MethodListEntry("__copy__",mxTagTable_copy), +#endif + {NULL,NULL} /* end of list */ +}; + +/* --- Internal functions ----------------------------------------------*/ + +#ifdef HAVE_UNICODE + +/* Same as mxTextTools_Join() for Unicode objects. */ + +static +PyObject *mxTextTools_UnicodeJoin(PyObject *seq, + Py_ssize_t start, + Py_ssize_t stop, + PyObject *separator) +{ + PyObject *newstring = 0, *tempstr = 0; + Py_ssize_t newstring_len,current_len = 0; + Py_UNICODE *p; + Py_ssize_t i; + Py_UNICODE *sep; + Py_ssize_t sep_len; + + if (separator) { + separator = PyUnicode_FromObject(separator); + if (separator == NULL) + goto onError; + sep = PyUnicode_AS_UNICODE(separator); + sep_len = PyUnicode_GET_SIZE(separator); + } + else { + sep = NULL; + sep_len = 0; + } + + /* Create an empty new string */ + newstring_len = (10 + sep_len) * (stop - start); + newstring = PyUnicode_FromUnicode(NULL, newstring_len); + if (newstring == NULL) + goto onError; + p = PyUnicode_AS_UNICODE(newstring); + + /* Join with separator */ + for (i = start; i < stop; i++) { + register PyObject *o; + Py_UNICODE *st; + Py_ssize_t len_st; + + o = PySequence_GetItem(seq, i); + + if PyTuple_Check(o) { + /* Tuple entry: (string,l,r,[...]) */ + register Py_ssize_t l,r; + + /* parse tuple */ + Py_Assert((PyTuple_GET_SIZE(o) >= 3) && + PyInt_Check(PyTuple_GET_ITEM(o,1)) && + PyInt_Check(PyTuple_GET_ITEM(o,2)), + PyExc_TypeError, + "tuples must be of the format (string,l,r[,...])"); + tempstr = PyUnicode_FromObject(PyTuple_GET_ITEM(o,0)); + if (tempstr == NULL) + goto onError; + st = PyUnicode_AS_UNICODE(tempstr); + len_st = PyUnicode_GET_SIZE(tempstr); + l = PyInt_AS_LONG(PyTuple_GET_ITEM(o,1)); + r = PyInt_AS_LONG(PyTuple_GET_ITEM(o,2)); + + /* compute slice */ + if (r > len_st) r = len_st; + else if (r < 0) { + r += len_st + 1; + if (r < 0) + r = 0; + } + if (l > len_st) l = len_st; + else if (l < 0) { + l += len_st + 1; + if (l < 0) + l = 0; + } + + /* empty ? */ + if (l > r) + continue; + len_st = r - l; + if (len_st == 0) + continue; + + /* get pointer right */ + st += l; + } + else { + /* Must be a string entry: take the whole string */ + tempstr = PyUnicode_FromObject(o); + if (tempstr == NULL) + goto onError; + st = PyUnicode_AS_UNICODE(tempstr); + len_st = PyUnicode_GET_SIZE(tempstr); + } + + Py_DECREF(o); + + /* Resize the new string if needed */ + while (current_len + len_st + sep_len >= newstring_len) { + newstring_len += newstring_len >> 1; + if (PyUnicode_Resize(&newstring, newstring_len)) + goto onError; + p = PyUnicode_AS_UNICODE(newstring) + current_len; + } + + /* Insert separator */ + if (i > 0 && sep_len > 0) { + Py_UNICODE_COPY(p, sep, sep_len); + p += sep_len; + current_len += sep_len; + } + + /* Copy snippet into new string */ + Py_UNICODE_COPY(p, st, len_st); + p += len_st; + current_len += len_st; + + Py_DECREF(tempstr); + tempstr = NULL; + } + + /* Resize new string to the actual length */ + if (PyUnicode_Resize(&newstring, current_len)) + goto onError; + + Py_XDECREF(separator); + return newstring; + + onError: + Py_XDECREF(newstring); + Py_XDECREF(separator); + Py_XDECREF(tempstr); + return NULL; +} + +#endif + +/* Enhanced string join: also excepts tuple (text, left, right,...) + entries which then cause text[left:right] to be used as string + snippet. + + separator may be NULL; in that case, "" is used as separator. + +*/ + +static +PyObject *mxTextTools_Join(PyObject *seq, + Py_ssize_t start, + Py_ssize_t stop, + PyObject *separator) +{ + PyObject *newstring = 0; + Py_ssize_t newstring_len, current_len = 0; + char *p; + Py_ssize_t i; + char *sep; + Py_ssize_t sep_len; + + if (separator) { +#ifdef HAVE_UNICODE + if (PyUnicode_Check(separator)) + return mxTextTools_UnicodeJoin(seq, start, stop, separator); +#endif + Py_Assert(PyString_Check(separator), + PyExc_TypeError, + "separator must be a string"); + sep = PyString_AS_STRING(separator); + sep_len = PyString_GET_SIZE(separator); + } + else { + sep = NULL; + sep_len = 0; + } + + /* Create an empty new string */ + newstring_len = (10 + sep_len) * (stop - start); + newstring = PyString_FromStringAndSize((char*)NULL, newstring_len); + if (newstring == NULL) + goto onError; + p = PyString_AS_STRING(newstring); + + /* Join with separator */ + for (i = start; i < stop; i++) { + register PyObject *o; + char *st; + Py_ssize_t len_st; + + o = PySequence_GetItem(seq, i); + + if PyTuple_Check(o) { + /* Tuple entry: (string,l,r,[...]) */ + register Py_ssize_t l,r; + + /* parse tuple */ + Py_Assert((PyTuple_GET_SIZE(o) >= 3) && + PyInt_Check(PyTuple_GET_ITEM(o,1)) && + PyInt_Check(PyTuple_GET_ITEM(o,2)), + PyExc_TypeError, + "tuples must be of the format (string,int,int[,...])"); +#ifdef HAVE_UNICODE + if (PyUnicode_Check(PyTuple_GET_ITEM(o,0))) { + /* Redirect to Unicode implementation; all previous work + is lost. */ + Py_DECREF(o); + Py_DECREF(newstring); + return mxTextTools_UnicodeJoin(seq, start, stop, separator); + } +#endif + Py_Assert(PyString_Check(PyTuple_GET_ITEM(o,0)), + PyExc_TypeError, + "tuples must be of the format (string,int,int[,...])"); + st = PyString_AS_STRING(PyTuple_GET_ITEM(o,0)); + len_st = PyString_GET_SIZE(PyTuple_GET_ITEM(o,0)); + l = PyInt_AS_LONG(PyTuple_GET_ITEM(o,1)); + r = PyInt_AS_LONG(PyTuple_GET_ITEM(o,2)); + + /* compute slice */ + if (r > len_st) r = len_st; + else if (r < 0) { + r += len_st + 1; + if (r < 0) + r = 0; + } + if (l > len_st) l = len_st; + else if (l < 0) { + l += len_st + 1; + if (l < 0) + l = 0; + } + + /* empty ? */ + if (l > r) + continue; + len_st = r - l; + if (len_st == 0) + continue; + + /* get pointer right */ + st += l; + } + else if (PyString_Check(o)) { + /* String entry: take the whole string */ + st = PyString_AS_STRING(o); + len_st = PyString_GET_SIZE(o); + } +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(o)) { + /* Redirect to Unicode implementation; all previous work + is lost. */ + Py_DECREF(o); + Py_DECREF(newstring); + return mxTextTools_UnicodeJoin(seq, start, stop, separator); + } +#endif + else { + Py_DECREF(o); + Py_Error(PyExc_TypeError, + "list must contain tuples or strings as entries"); + } + + Py_DECREF(o); + + /* Resize the new string if needed */ + while (current_len + len_st + sep_len >= newstring_len) { + newstring_len += newstring_len >> 1; + if (_PyString_Resize(&newstring, newstring_len)) + goto onError; + p = PyString_AS_STRING(newstring) + current_len; + } + + /* Insert separator */ + if (i > 0 && sep_len > 0) { + memcpy(p, sep, sep_len); + p += sep_len; + current_len += sep_len; + } + + /* Copy snippet into new string */ + memcpy(p,st,len_st); + p += len_st; + current_len += len_st; + } + + /* Resize new string to the actual length */ + if (_PyString_Resize(&newstring, current_len)) + goto onError; + + return newstring; + + onError: + Py_XDECREF(newstring); + return NULL; +} + +static +PyObject *mxTextTools_HexStringFromString(char *str, + Py_ssize_t len) +{ + PyObject *w = 0; + Py_ssize_t i; + char *hex; + static const char hexdigits[] = "0123456789abcdef"; + + /* Convert to HEX */ + w = PyString_FromStringAndSize(NULL,2*len); + if (!w) + goto onError; + hex = PyString_AS_STRING(w); + for (i = 0; i < len; i ++) { + unsigned char c = (unsigned char)*str; + + *hex++ = hexdigits[c >> 4]; + *hex++ = hexdigits[c & 0x0F]; + str++; + } + return w; + + onError: + Py_XDECREF(w); + return NULL; +} + +static +PyObject *mxTextTools_StringFromHexString(char *hex, + Py_ssize_t len) +{ + PyObject *w = 0; + Py_ssize_t i; + char *str; + static const char hexdigits[] = "0123456789abcdef"; + + /* Convert to string */ + Py_Assert(len % 2 == 0, + PyExc_TypeError, + "need 2-digit hex string argument"); + len >>= 1; + w = PyString_FromStringAndSize(NULL,len); + if (!w) + goto onError; + str = PyString_AS_STRING(w); + for (i = 0; i < len; i++,str++) { + register char c; + register Py_ssize_t j; + + c = tolower(*hex++); + for (j = 0; j < (Py_ssize_t)sizeof(hexdigits); j++) + if (c == hexdigits[j]) { + *str = j << 4; + break; + } + if (j == sizeof(hexdigits)) { + DPRINTF("Failed: '%c' (%u) at %i\n",c,(unsigned int)c,i); + Py_Error(PyExc_ValueError, + "argument contains non-hex characters"); + } + + c = tolower(*hex++); + for (j = 0; j < (Py_ssize_t)sizeof(hexdigits); j++) + if (c == hexdigits[j]) { + *str += j; + break; + } + if (j == sizeof(hexdigits)) { + DPRINTF("Failed2: '%c' (%u) at %i\n",c,(unsigned int)c,i); + Py_Error(PyExc_ValueError, + "argument contains non-hex characters"); + } + } + return w; + + onError: + Py_XDECREF(w); + return NULL; +} + +static +int mxTextTools_IsASCII(PyObject *text, + Py_ssize_t left, + Py_ssize_t right) +{ + if (PyString_Check(text)) { + Py_ssize_t len; + register Py_ssize_t i; + register unsigned char *str = (unsigned char *)PyString_AS_STRING(text); + + len = PyString_GET_SIZE(text); + Py_CheckSequenceSlice(len, left, right); + for (i = left; i < right; i++) + if (str[i] >= 128) + return 0; + return 1; + } + +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) { + Py_ssize_t len; + register Py_ssize_t i; + register Py_UNICODE *str = PyUnicode_AS_UNICODE(text); + + len = PyUnicode_GET_SIZE(text); + Py_CheckSequenceSlice(len, left, right); + for (i = left; i < right; i++) + if (str[i] >= 128) + return 0; + return 1; + } +#endif + + else + Py_Error(PyExc_TypeError, + "need string object"); + + onError: + return -1; +} + +/* Takes a list of tuples (replacement,l,r,...) and produces a taglist + suitable for mxTextTools_Join() which creates a copy of + text where every slice [l:r] is replaced by the given replacement. + +*/ + +static +PyObject *mxTextTools_Joinlist(PyObject *text, + PyObject *list, + Py_ssize_t pos, + Py_ssize_t text_len) +{ + PyObject *joinlist = 0; + Py_ssize_t list_len; + Py_ssize_t i; + Py_ssize_t listitem = 0; + Py_ssize_t listsize = INITIAL_LIST_SIZE; + + if (PyString_Check(text)) { + Py_CheckStringSlice(text, pos, text_len); + } +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) { + Py_CheckUnicodeSlice(text, pos, text_len); + } +#endif + else + Py_Error(PyExc_TypeError, + "expected string or unicode"); + + Py_Assert(PyList_Check(list), + PyExc_TypeError, + "expected a list of tuples as second argument"); + list_len = PyList_GET_SIZE(list); + + joinlist = PyList_New(listsize); + if (joinlist == NULL) + goto onError; + + for (i = 0; i < list_len; i++) { + register PyObject *t; + register Py_ssize_t left, right; + + t = PyList_GET_ITEM(list, i); + Py_Assert(PyTuple_Check(t) && + (PyTuple_GET_SIZE(t) >= 3) && + (PyString_Check(PyTuple_GET_ITEM(t,0)) || + PyUnicode_Check(PyTuple_GET_ITEM(t,0))) && + PyInt_Check(PyTuple_GET_ITEM(t,1)) && + PyInt_Check(PyTuple_GET_ITEM(t,2)), + PyExc_TypeError, + "tuples must be of the form (string,int,int,...)"); + left = PyInt_AS_LONG(PyTuple_GET_ITEM(t,1)); + right = PyInt_AS_LONG(PyTuple_GET_ITEM(t,2)); + + Py_Assert(left >= pos, + PyExc_ValueError, + "list is not sorted ascending"); + + if (left > pos) { /* joinlist.append((text,pos,left)) */ + register PyObject *v; + register PyObject *w; + + v = PyTuple_New(3); + if (v == NULL) + goto onError; + + Py_INCREF(text); + PyTuple_SET_ITEM(v,0,text); + + w = PyInt_FromLong(pos); + if (w == NULL) + goto onError; + PyTuple_SET_ITEM(v,1,w); + + w = PyTuple_GET_ITEM(t,1); + Py_INCREF(w); + PyTuple_SET_ITEM(v,2,w); + + if (listitem < listsize) + PyList_SET_ITEM(joinlist,listitem,v); + else { + PyList_Append(joinlist,v); + Py_DECREF(v); + } + listitem++; + } + + /* joinlist.append(string) */ + if (listitem < listsize) { + register PyObject *v = PyTuple_GET_ITEM(t,0); + Py_INCREF(v); + PyList_SET_ITEM(joinlist,listitem,v); + } + else + PyList_Append(joinlist,PyTuple_GET_ITEM(t,0)); + listitem++; + + pos = right; + } + + if (pos < text_len) { /* joinlist.append((text,pos,text_len)) */ + register PyObject *v; + register PyObject *w; + + v = PyTuple_New(3); + if (v == NULL) + goto onError; + + Py_INCREF(text); + PyTuple_SET_ITEM(v,0,text); + + w = PyInt_FromLong(pos); + if (w == NULL) + goto onError; + PyTuple_SET_ITEM(v,1,w); + + w = PyInt_FromLong(text_len); + if (w == NULL) + goto onError; + PyTuple_SET_ITEM(v,2,w); + + if (listitem < listsize) + PyList_SET_ITEM(joinlist,listitem,v); + else { + PyList_Append(joinlist,v); + Py_DECREF(v); + } + listitem++; + } + + /* Resize list if necessary */ + if (listitem < listsize) + PyList_SetSlice(joinlist,listitem,listsize,(PyObject*)NULL); + + return joinlist; + + onError: + + Py_XDECREF(joinlist); + return NULL; +} + +#ifdef HAVE_UNICODE +static +PyObject *mxTextTools_UnicodeCharSplit(PyObject *text, + PyObject *separator, + Py_ssize_t start, + Py_ssize_t text_len) +{ + PyObject *list = NULL; + register Py_ssize_t x; + Py_ssize_t listitem = 0; + Py_ssize_t listsize = INITIAL_LIST_SIZE; + Py_UNICODE *tx; + Py_UNICODE sep; + + text = PyUnicode_FromObject(text); + if (text == NULL) { + separator = NULL; + goto onError; + } + separator = PyUnicode_FromObject(separator); + if (separator == NULL) + goto onError; + + Py_CheckUnicodeSlice(text, start, text_len); + + Py_Assert(PyUnicode_GET_SIZE(separator) == 1, + PyExc_TypeError, + "separator must be a single character"); + + tx = PyUnicode_AS_UNICODE(text); + sep = *PyUnicode_AS_UNICODE(separator); + + list = PyList_New(listsize); + if (!list) + goto onError; + + x = start; + while (1) { + PyObject *s; + register Py_ssize_t z; + + /* Skip to next separator */ + z = x; + for (;x < text_len; x++) + if (tx[x] == sep) + break; + + /* Append the slice to list */ + s = PyUnicode_FromUnicode(&tx[z], x - z); + if (!s) + goto onError; + if (listitem < listsize) + PyList_SET_ITEM(list,listitem,s); + else { + PyList_Append(list,s); + Py_DECREF(s); + } + listitem++; + + if (x == text_len) + break; + + /* Skip separator */ + x++; + } + + /* Resize list if necessary */ + if (listitem < listsize) + PyList_SetSlice(list,listitem,listsize,(PyObject*)NULL); + + Py_DECREF(text); + Py_DECREF(separator); + return list; + + onError: + Py_XDECREF(list); + Py_XDECREF(text); + Py_XDECREF(separator); + return NULL; +} +#endif + +static +PyObject *mxTextTools_CharSplit(PyObject *text, + PyObject *separator, + Py_ssize_t start, + Py_ssize_t text_len) +{ + PyObject *list = 0; + register Py_ssize_t x; + Py_ssize_t listitem = 0; + Py_ssize_t listsize = INITIAL_LIST_SIZE; + char *tx; + char sep; + +#ifdef HAVE_UNICODE + if (PyUnicode_Check(text) || PyUnicode_Check(separator)) + return mxTextTools_UnicodeCharSplit(text, separator, + start, text_len); +#endif + + if (PyString_Check(text) && PyString_Check(separator)) { + Py_CheckStringSlice(text, start, text_len); + } + else + Py_Error(PyExc_TypeError, + "text and separator must be strings or unicode"); + + Py_Assert(PyString_GET_SIZE(separator) == 1, + PyExc_TypeError, + "separator must be a single character"); + + tx = PyString_AS_STRING(text); + sep = *PyString_AS_STRING(separator); + + list = PyList_New(listsize); + if (!list) + goto onError; + + x = start; + while (1) { + PyObject *s; + register Py_ssize_t z; + + /* Skip to next separator */ + z = x; + for (;x < text_len; x++) + if (tx[x] == sep) + break; + + /* Append the slice to list */ + s = PyString_FromStringAndSize(&tx[z], x - z); + if (!s) + goto onError; + if (listitem < listsize) + PyList_SET_ITEM(list,listitem,s); + else { + PyList_Append(list,s); + Py_DECREF(s); + } + listitem++; + + if (x == text_len) + break; + + /* Skip separator */ + x++; + } + + /* Resize list if necessary */ + if (listitem < listsize) + PyList_SetSlice(list,listitem,listsize,(PyObject*)NULL); + + return list; + + onError: + Py_XDECREF(list); + return NULL; +} + +#ifdef HAVE_UNICODE +static +PyObject *mxTextTools_UnicodeSplitAt(PyObject *text, + PyObject *separator, + Py_ssize_t nth, + Py_ssize_t start, + Py_ssize_t text_len) +{ + PyObject *tuple = 0; + register Py_ssize_t x; + PyObject *s; + Py_UNICODE *tx; + Py_UNICODE sep; + + text = PyUnicode_FromObject(text); + if (text == NULL) { + separator = NULL; + goto onError; + } + separator = PyUnicode_FromObject(separator); + if (separator == NULL) + goto onError; + + Py_CheckUnicodeSlice(text, start, text_len); + + Py_Assert(PyUnicode_GET_SIZE(separator) == 1, + PyExc_TypeError, + "separator must be a single character"); + + tx = PyUnicode_AS_UNICODE(text); + sep = *PyUnicode_AS_UNICODE(separator); + + tuple = PyTuple_New(2); + if (!tuple) + goto onError; + + if (nth > 0) { + /* Skip to nth separator from the left */ + x = start; + while (1) { + for (; x < text_len; x++) + if (tx[x] == sep) + break; + if (--nth == 0 || x == text_len) + break; + x++; + } + } + else if (nth < 0) { + /* Skip to nth separator from the right */ + x = text_len - 1; + while (1) { + for (; x >= start; x--) + if (tx[x] == sep) + break; + if (++nth == 0 || x < start) + break; + x--; + } + } + else + Py_Error(PyExc_ValueError, + "nth must be non-zero"); + + /* Add to tuple */ + if (x < start) + s = PyUnicode_FromUnicode((Py_UNICODE *)"", 0); + else + s = PyUnicode_FromUnicode(&tx[start], x - start); + if (!s) + goto onError; + PyTuple_SET_ITEM(tuple,0,s); + + /* Skip separator */ + x++; + + if (x >= text_len) + s = PyUnicode_FromUnicode((Py_UNICODE *)"", 0); + else + s = PyUnicode_FromUnicode(&tx[x], text_len - x); + if (!s) + goto onError; + PyTuple_SET_ITEM(tuple,1,s); + + Py_DECREF(text); + Py_DECREF(separator); + return tuple; + + onError: + Py_XDECREF(tuple); + Py_XDECREF(text); + Py_XDECREF(separator); + return NULL; +} +#endif + +static +PyObject *mxTextTools_SplitAt(PyObject *text, + PyObject *separator, + Py_ssize_t nth, + Py_ssize_t start, + Py_ssize_t text_len) +{ + PyObject *tuple = 0; + register Py_ssize_t x; + PyObject *s; + char *tx; + char sep; + +#ifdef HAVE_UNICODE + if (PyUnicode_Check(text) || PyUnicode_Check(separator)) + return mxTextTools_UnicodeSplitAt(text, separator, + nth, start, text_len); +#endif + + if (PyString_Check(text) && PyString_Check(separator)) { + Py_CheckStringSlice(text, start, text_len); + } + else + Py_Error(PyExc_TypeError, + "text and separator must be strings or unicode"); + + Py_Assert(PyString_GET_SIZE(separator) == 1, + PyExc_TypeError, + "separator must be a single character"); + + tx = PyString_AS_STRING(text); + sep = *PyString_AS_STRING(separator); + + tuple = PyTuple_New(2); + if (!tuple) + goto onError; + + if (nth > 0) { + /* Skip to nth separator from the left */ + x = start; + while (1) { + for (; x < text_len; x++) + if (tx[x] == sep) + break; + if (--nth == 0 || x == text_len) + break; + x++; + } + } + else if (nth < 0) { + /* Skip to nth separator from the right */ + x = text_len - 1; + while (1) { + for (; x >= start; x--) + if (tx[x] == sep) + break; + if (++nth == 0 || x < start) + break; + x--; + } + } + else + Py_Error(PyExc_ValueError, + "nth must be non-zero"); + + /* Add to tuple */ + if (x < start) + s = PyString_FromStringAndSize("",0); + else + s = PyString_FromStringAndSize(&tx[start], x - start); + if (!s) + goto onError; + PyTuple_SET_ITEM(tuple,0,s); + + /* Skip separator */ + x++; + + if (x >= text_len) + s = PyString_FromStringAndSize("",0); + else + s = PyString_FromStringAndSize(&tx[x], text_len - x); + if (!s) + goto onError; + PyTuple_SET_ITEM(tuple,1,s); + + return tuple; + + onError: + Py_XDECREF(tuple); + return NULL; +} + +#ifdef HAVE_UNICODE +static +PyObject *mxTextTools_UnicodeSuffix(PyObject *text, + PyObject *suffixes, + Py_ssize_t start, + Py_ssize_t text_len, + PyObject *translate) +{ + Py_ssize_t i; + Py_UNICODE *tx; + + text = PyUnicode_FromObject(text); + if (text == NULL) + goto onError; + + if (PyUnicode_Check(text)) { + Py_CheckUnicodeSlice(text, start, text_len); + } + else + Py_Error(PyExc_TypeError, + "expected unicode"); + Py_Assert(PyTuple_Check(suffixes), + PyExc_TypeError, + "suffixes needs to be a tuple of unicode strings"); + + /* XXX Add support for translate... */ + Py_Assert(translate == NULL, + PyExc_TypeError, + "translate is not supported for Unicode suffix()es"); + + tx = PyUnicode_AS_UNICODE(text); + + for (i = 0; i < PyTuple_GET_SIZE(suffixes); i++) { + PyObject *suffix = PyTuple_GET_ITEM(suffixes,i); + Py_ssize_t start_cmp; + + suffix = PyUnicode_FromObject(suffix); + if (suffix == NULL) + goto onError; + + start_cmp = text_len - PyUnicode_GET_SIZE(suffix); + if (start_cmp >= start && + PyUnicode_AS_UNICODE(suffix)[0] == tx[start_cmp] && + memcmp(PyUnicode_AS_UNICODE(suffix), + &tx[start_cmp], + PyUnicode_GET_DATA_SIZE(suffix)) == 0) { + Py_DECREF(text); + return suffix; + } + + Py_DECREF(suffix); + } + + Py_DECREF(text); + Py_ReturnNone(); + + onError: + Py_XDECREF(text); + return NULL; +} +#endif + +static +PyObject *mxTextTools_Suffix(PyObject *text, + PyObject *suffixes, + Py_ssize_t start, + Py_ssize_t text_len, + PyObject *translate) +{ + Py_ssize_t i; + char *tx; + +#ifdef HAVE_UNICODE + if (PyUnicode_Check(text)) + return mxTextTools_UnicodeSuffix(text, suffixes, + start, text_len, + translate); +#endif + + if (PyString_Check(text)) { + Py_CheckStringSlice(text, start, text_len); + } + else + Py_Error(PyExc_TypeError, + "expected string or unicode"); + Py_Assert(PyTuple_Check(suffixes), + PyExc_TypeError, + "suffixes needs to be a tuple of strings"); + tx = PyString_AS_STRING(text); + + if (translate) { + char *tr; + + Py_Assert(PyString_Check(translate) && + PyString_GET_SIZE(translate) == 256, + PyExc_TypeError, + "translate must be a string having 256 characters"); + tr = PyString_AS_STRING(translate); + + for (i = 0; i < PyTuple_GET_SIZE(suffixes); i++) { + PyObject *suffix = PyTuple_GET_ITEM(suffixes, i); + Py_ssize_t start_cmp; + register char *s; + register char *t; + register Py_ssize_t j; + + Py_AssertWithArg(PyString_Check(suffix), + PyExc_TypeError, + "tuple entry %d is not a string",(unsigned int)i); + start_cmp = text_len - PyString_GET_SIZE(suffix); + if (start_cmp < start) + continue; + + /* Do the compare using a translate table */ + s = PyString_AS_STRING(suffix); + t = tx + start_cmp; + for (j = start_cmp; j < text_len; j++, s++, t++) + if (*s != tr[(unsigned char)*t]) + break; + if (j == text_len) { + Py_INCREF(suffix); + return suffix; + } + } + } + + else + for (i = 0; i < PyTuple_GET_SIZE(suffixes); i++) { + PyObject *suffix = PyTuple_GET_ITEM(suffixes,i); + Py_ssize_t start_cmp; + + Py_AssertWithArg(PyString_Check(suffix), + PyExc_TypeError, + "tuple entry %d is not a string",(unsigned int)i); + start_cmp = text_len - PyString_GET_SIZE(suffix); + if (start_cmp < start) + continue; + + /* Compare without translate table */ + if (PyString_AS_STRING(suffix)[0] == tx[start_cmp] + && + strncmp(PyString_AS_STRING(suffix), + &tx[start_cmp], + PyString_GET_SIZE(suffix)) == 0) { + Py_INCREF(suffix); + return suffix; + } + } + + Py_ReturnNone(); + + onError: + return NULL; +} + +#ifdef HAVE_UNICODE +static +PyObject *mxTextTools_UnicodePrefix(PyObject *text, + PyObject *prefixes, + Py_ssize_t start, + Py_ssize_t text_len, + PyObject *translate) +{ + Py_ssize_t i; + Py_UNICODE *tx; + + text = PyUnicode_FromObject(text); + if (text == NULL) + goto onError; + + if (PyUnicode_Check(text)) { + Py_CheckUnicodeSlice(text, start, text_len); + } + else + Py_Error(PyExc_TypeError, + "expected unicode"); + Py_Assert(PyTuple_Check(prefixes), + PyExc_TypeError, + "prefixes needs to be a tuple of unicode strings"); + + /* XXX Add support for translate... */ + Py_Assert(translate == NULL, + PyExc_TypeError, + "translate is not supported for Unicode prefix()es"); + + tx = PyUnicode_AS_UNICODE(text); + + for (i = 0; i < PyTuple_GET_SIZE(prefixes); i++) { + PyObject *prefix = PyTuple_GET_ITEM(prefixes,i); + + prefix = PyUnicode_FromObject(prefix); + if (prefix == NULL) + goto onError; + + /* Compare without translate table */ + if (start + PyString_GET_SIZE(prefix) <= text_len && + PyUnicode_AS_UNICODE(prefix)[0] == tx[start] && + memcmp(PyUnicode_AS_UNICODE(prefix), + &tx[start], + PyUnicode_GET_DATA_SIZE(prefix)) == 0) { + Py_INCREF(prefix); + return prefix; + } + + Py_DECREF(prefix); + } + + Py_DECREF(text); + Py_ReturnNone(); + + onError: + Py_XDECREF(text); + return NULL; +} +#endif + +static +PyObject *mxTextTools_Prefix(PyObject *text, + PyObject *prefixes, + Py_ssize_t start, + Py_ssize_t text_len, + PyObject *translate) +{ + Py_ssize_t i; + char *tx; + +#ifdef HAVE_UNICODE + if (PyUnicode_Check(text)) + return mxTextTools_UnicodePrefix(text, prefixes, + start, text_len, + translate); +#endif + + if (PyString_Check(text)) { + Py_CheckStringSlice(text, start, text_len); + } + else + Py_Error(PyExc_TypeError, + "expected string or unicode"); + Py_Assert(PyTuple_Check(prefixes), + PyExc_TypeError, + "prefixes needs to be a tuple of strings"); + tx = PyString_AS_STRING(text); + + if (translate) { + char *tr; + + Py_Assert(PyString_Check(translate) && + PyString_GET_SIZE(translate) == 256, + PyExc_TypeError, + "translate must be a string having 256 characters"); + tr = PyString_AS_STRING(translate); + + for (i = 0; i < PyTuple_GET_SIZE(prefixes); i++) { + PyObject *prefix = PyTuple_GET_ITEM(prefixes,i); + Py_ssize_t cmp_len; + register char *s; + register char *t; + register Py_ssize_t j; + + Py_AssertWithArg(PyString_Check(prefix), + PyExc_TypeError, + "tuple entry %d is not a string",(unsigned int)i); + cmp_len = PyString_GET_SIZE(prefix); + if (start + cmp_len > text_len) + continue; + + /* Do the compare using a translate table */ + s = PyString_AS_STRING(prefix); + t = tx + start; + for (j = 0; j < cmp_len; j++, s++, t++) + if (*s != tr[(unsigned char)*t]) + break; + if (j == cmp_len) { + Py_INCREF(prefix); + return prefix; + } + } + } + + else + for (i = 0; i < PyTuple_GET_SIZE(prefixes); i++) { + PyObject *prefix = PyTuple_GET_ITEM(prefixes,i); + + Py_AssertWithArg(PyString_Check(prefix), + PyExc_TypeError, + "tuple entry %d is not a string",(unsigned int)i); + if (start + PyString_GET_SIZE(prefix) > text_len) + continue; + + /* Compare without translate table */ + if (PyString_AS_STRING(prefix)[0] == tx[start] && + strncmp(PyString_AS_STRING(prefix), + &tx[start], + PyString_GET_SIZE(prefix)) == 0) { + Py_INCREF(prefix); + return prefix; + } + } + + Py_ReturnNone(); + + onError: + return NULL; +} + +/* Stips off characters appearing in the character set from text[start:stop] + and returns the result as Python string object. + + where indicates the mode: + where < 0: strip left only + where = 0: strip left and right + where > 0: strip right only + +*/ +static +PyObject *mxTextTools_SetStrip(char *tx, + Py_ssize_t tx_len, + char *setstr, + Py_ssize_t setstr_len, + Py_ssize_t start, + Py_ssize_t stop, + Py_ssize_t where) +{ + Py_ssize_t left, right; + + Py_Assert(setstr_len == 32, + PyExc_TypeError, + "separator needs to be a set as obtained from set()"); + Py_CheckBufferSlice(tx_len, start, stop); + + /* Strip left */ + if (where <= 0) { + register Py_ssize_t x; + for (x = start; x < stop; x++) + if (!Py_CharInSet(tx[x], setstr)) + break; + left = x; + } + else + left = start; + + /* Strip right */ + if (where >= 0) { + register Py_ssize_t x; + for (x = stop - 1; x >= start; x--) + if (!Py_CharInSet(tx[x], setstr)) + break; + right = x + 1; + } + else + right = stop; + + return PyString_FromStringAndSize(tx + left, max(right - left, 0)); + + onError: + return NULL; +} + +static +PyObject *mxTextTools_SetSplit(char *tx, + Py_ssize_t tx_len, + char *setstr, + Py_ssize_t setstr_len, + Py_ssize_t start, + Py_ssize_t text_len) +{ + PyObject *list = NULL; + register Py_ssize_t x; + Py_ssize_t listitem = 0; + Py_ssize_t listsize = INITIAL_LIST_SIZE; + + Py_Assert(setstr_len == 32, + PyExc_TypeError, + "separator needs to be a set as obtained from set()"); + Py_CheckBufferSlice(tx_len,start,text_len); + + list = PyList_New(listsize); + if (!list) + goto onError; + + x = start; + while (x < text_len) { + Py_ssize_t z; + + /* Skip all text in set */ + for (;x < text_len; x++) { + register Py_ssize_t c = (unsigned char)tx[x]; + register Py_ssize_t block = (unsigned char)setstr[c >> 3]; + if (!block || ((block & (1 << (c & 7))) == 0)) + break; + } + + /* Skip all text not in set */ + z = x; + for (;x < text_len; x++) { + register Py_ssize_t c = (unsigned char)tx[x]; + register Py_ssize_t block = (unsigned char)setstr[c >> 3]; + if (block && ((block & (1 << (c & 7))) != 0)) + break; + } + + /* Append the slice to list if it is not empty */ + if (x > z) { + PyObject *s; + s = PyString_FromStringAndSize((char *)&tx[z], x - z); + if (!s) + goto onError; + if (listitem < listsize) + PyList_SET_ITEM(list,listitem,s); + else { + PyList_Append(list,s); + Py_DECREF(s); + } + listitem++; + } + } + + /* Resize list if necessary */ + if (listitem < listsize) + PyList_SetSlice(list,listitem,listsize,(PyObject*)NULL); + + return list; + + onError: + Py_XDECREF(list); + return NULL; +} + +static +PyObject *mxTextTools_SetSplitX(char *tx, + Py_ssize_t tx_len, + char *setstr, + Py_ssize_t setstr_len, + Py_ssize_t start, + Py_ssize_t text_len) +{ + PyObject *list = NULL; + register Py_ssize_t x; + Py_ssize_t listitem = 0; + Py_ssize_t listsize = INITIAL_LIST_SIZE; + + Py_Assert(setstr_len == 32, + PyExc_TypeError, + "separator needs to be a set as obtained from set()"); + Py_CheckBufferSlice(tx_len,start,text_len); + + list = PyList_New(listsize); + if (!list) + goto onError; + + x = start; + while (x < text_len) { + PyObject *s; + register Py_ssize_t z; + + /* Skip all text not in set */ + z = x; + for (;x < text_len; x++) { + register unsigned int c = (unsigned char)tx[x]; + register unsigned int block = (unsigned char)setstr[c >> 3]; + if (block && ((block & (1 << (c & 7))) != 0)) + break; + } + + /* Append the slice to list */ + s = PyString_FromStringAndSize((char *)&tx[z], x - z); + if (!s) + goto onError; + if (listitem < listsize) + PyList_SET_ITEM(list,listitem,s); + else { + PyList_Append(list,s); + Py_DECREF(s); + } + listitem++; + + if (x >= text_len) + break; + + /* Skip all text in set */ + z = x; + for (;x < text_len; x++) { + register unsigned int c = (unsigned char)tx[x]; + register unsigned int block = (unsigned char)setstr[c >> 3]; + if (!block || ((block & (1 << (c & 7))) == 0)) + break; + } + + /* Append the slice to list if it is not empty */ + s = PyString_FromStringAndSize((char *)&tx[z], x - z); + if (!s) + goto onError; + if (listitem < listsize) + PyList_SET_ITEM(list,listitem,s); + else { + PyList_Append(list,s); + Py_DECREF(s); + } + listitem++; + } + + /* Resize list if necessary */ + if (listitem < listsize) + PyList_SetSlice(list,listitem,listsize,(PyObject*)NULL); + + return list; + + onError: + Py_XDECREF(list); + return NULL; +} + +static +PyObject *mxTextTools_Upper(PyObject *text) +{ + PyObject *ntext; + register unsigned char *s; + register unsigned char *orig; + register Py_ssize_t i; + unsigned char *tr; + Py_ssize_t len; + + Py_Assert(PyString_Check(text), + PyExc_TypeError, + "expected a Python string"); + + len = PyString_GET_SIZE(text); + ntext = PyString_FromStringAndSize(NULL,len); + if (!ntext) + goto onError; + + /* Translate */ + tr = (unsigned char *)PyString_AS_STRING(mx_ToUpper); + orig = (unsigned char *)PyString_AS_STRING(text); + s = (unsigned char *)PyString_AS_STRING(ntext); + for (i = 0; i < len; i++, s++, orig++) + *s = tr[*orig]; + + return ntext; + + onError: + return NULL; +} + +#ifdef HAVE_UNICODE +static +PyObject *mxTextTools_UnicodeUpper(PyObject *text) +{ + PyObject *ntext; + register Py_UNICODE *s; + register Py_UNICODE *orig; + register Py_ssize_t i; + Py_ssize_t len; + + text = PyUnicode_FromObject(text); + if (text == NULL) + goto onError; + + len = PyUnicode_GET_SIZE(text); + ntext = PyUnicode_FromUnicode(NULL, len); + if (!ntext) + goto onError; + + /* Translate */ + orig = (Py_UNICODE *)PyUnicode_AS_UNICODE(text); + s = (Py_UNICODE *)PyUnicode_AS_UNICODE(ntext); + for (i = 0; i < len; i++, s++, orig++) + *s = Py_UNICODE_TOUPPER(*orig); + + Py_DECREF(text); + return ntext; + + onError: + Py_XDECREF(text); + return NULL; +} +#endif + +static +PyObject *mxTextTools_Lower(PyObject *text) +{ + PyObject *ntext; + register unsigned char *s; + register unsigned char *orig; + register Py_ssize_t i; + unsigned char *tr; + Py_ssize_t len; + + Py_Assert(PyString_Check(text), + PyExc_TypeError, + "expected a Python string"); + + len = PyString_GET_SIZE(text); + ntext = PyString_FromStringAndSize(NULL,len); + if (!ntext) + goto onError; + + /* Translate */ + tr = (unsigned char *)PyString_AS_STRING(mx_ToLower); + orig = (unsigned char *)PyString_AS_STRING(text); + s = (unsigned char *)PyString_AS_STRING(ntext); + for (i = 0; i < len; i++, s++, orig++) + *s = tr[*orig]; + + return ntext; + + onError: + return NULL; +} + +#ifdef HAVE_UNICODE +static +PyObject *mxTextTools_UnicodeLower(PyObject *text) +{ + PyObject *ntext; + register Py_UNICODE *s; + register Py_UNICODE *orig; + register Py_ssize_t i; + Py_ssize_t len; + + text = PyUnicode_FromObject(text); + if (text == NULL) + goto onError; + + len = PyUnicode_GET_SIZE(text); + ntext = PyUnicode_FromUnicode(NULL, len); + if (!ntext) + goto onError; + + /* Translate */ + orig = (Py_UNICODE *)PyUnicode_AS_UNICODE(text); + s = (Py_UNICODE *)PyUnicode_AS_UNICODE(ntext); + for (i = 0; i < len; i++, s++, orig++) + *s = Py_UNICODE_TOLOWER(*orig); + + Py_DECREF(text); + return ntext; + + onError: + Py_XDECREF(text); + return NULL; +} +#endif + +/* --- Module functions ------------------------------------------------*/ + +/* Interface to the tagging engine in mxte.c */ + +Py_C_Function_WithKeywords( + mxTextTools_tag, + "tag(text,tagtable,sliceleft=0,sliceright=len(text),taglist=[],context=None) \n""" + "Produce a tag list for a string, given a tag-table\n" + "- returns a tuple (success, taglist, nextindex)\n" + "- if taglist == None, then no taglist is created" + ) +{ + PyObject *text; + PyObject *tagtable; + Py_ssize_t sliceright = INT_MAX; + Py_ssize_t sliceleft = 0; + PyObject *taglist = 0; + Py_ssize_t taglist_len; + PyObject *context = 0; + Py_ssize_t next, result; + PyObject *res; + + Py_KeywordsGet6Args("OO|iiOO:tag", + text,tagtable,sliceleft,sliceright,taglist,context); + + if (taglist == NULL) { + /* not given, so use default: an empty list */ + taglist = PyList_New(0); + if (taglist == NULL) + goto onError; + taglist_len = 0; + } + else { + Py_INCREF(taglist); + Py_Assert(PyList_Check(taglist) || taglist == Py_None, + PyExc_TypeError, + "taglist must be a list or None"); + if (taglist != Py_None) { + taglist_len = PyList_Size(taglist); + if (taglist_len < 0) + goto onError; + } + else + taglist_len = 0; + } + + Py_Assert(mxTagTable_Check(tagtable) || + PyTuple_Check(tagtable) || + PyList_Check(tagtable), + PyExc_TypeError, + "tagtable must be a TagTable instance, list or tuple"); + + /* Prepare the argument for the Tagging Engine and let it process + the request */ + if (PyString_Check(text)) { + + Py_CheckStringSlice(text, sliceleft, sliceright); + + if (!mxTagTable_Check(tagtable)) { + tagtable = mxTagTable_New(tagtable, MXTAGTABLE_STRINGTYPE, 1); + if (tagtable == NULL) + goto onError; + } + else if (mxTagTable_Type(tagtable) != MXTAGTABLE_STRINGTYPE) { + Py_Error(PyExc_TypeError, + "TagTable instance is not intended for parsing strings"); + } + else + Py_INCREF(tagtable); + + /* Call the Tagging Engine */ + result = mxTextTools_TaggingEngine(text, + sliceleft, + sliceright, + (mxTagTableObject *)tagtable, + taglist, + context, + &next); + Py_DECREF(tagtable); + + } +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) { + + Py_CheckUnicodeSlice(text, sliceleft, sliceright); + + if (!mxTagTable_Check(tagtable)) { + tagtable = mxTagTable_New(tagtable, 1, 1); + if (tagtable == NULL) + goto onError; + } + else if (mxTagTable_Type(tagtable) != MXTAGTABLE_UNICODETYPE) { + Py_Error(PyExc_TypeError, + "TagTable instance is not intended for parsing Unicode"); + } + else + Py_INCREF(tagtable); + + /* Call the Tagging Engine */ + result = mxTextTools_UnicodeTaggingEngine(text, + sliceleft, + sliceright, + (mxTagTableObject *)tagtable, + taglist, + context, + &next); + Py_DECREF(tagtable); + + } +#endif + else + Py_Error(PyExc_TypeError, + "text must be a string or unicode"); + + /* Check for exceptions during matching */ + if (result == 0) + goto onError; + + /* Undo changes to taglist in case of a match failure (result == 1) */ + if (result == 1 && taglist != Py_None) { + DPRINTF(" undoing changes: del taglist[%i:%i]\n", + taglist_len, PyList_Size(taglist)); + if (PyList_SetSlice(taglist, + taglist_len, + PyList_Size(taglist), + NULL)) + goto onError; + } + + /* Convert result to the documented external values: + 0 - no match, 1 - match. */ + result--; + + /* Build result tuple */ + res = PyTuple_New(3); + if (!res) + goto onError; + PyTuple_SET_ITEM(res,0,PyInt_FromLong(result)); + PyTuple_SET_ITEM(res,1,taglist); + PyTuple_SET_ITEM(res,2,PyInt_FromLong(next)); + return res; + + onError: + if (!PyErr_Occurred()) + Py_Error(PyExc_SystemError, + "NULL result without error in builtin tag()"); + Py_XDECREF(taglist); + return NULL; +} + +/* An extended version of string.join() for taglists: */ + +Py_C_Function( mxTextTools_join, + "join(joinlist,sep='',start=0,stop=len(joinlist))\n\n" + "Copy snippets from different strings together producing a\n" + "new string\n" + "The first argument must be a list of tuples or strings;\n" + "tuples must be of the form (string,l,r[,...]) and turn out\n" + "as string[l:r]\n" + "NOTE: the syntax used for negative slices is different\n" + "than the Python standard: -1 corresponds to the first\n" + "character *after* the string, e.g. ('Example',0,-1) gives\n" + "'Example' and not 'Exampl', like in Python\n" + "sep is an optional separator string, start and stop\n" + "define the slice of joinlist that is taken into accont." + ) +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *joinlist = NULL; + Py_ssize_t joinlist_len; + PyObject *separator = NULL; + Py_ssize_t start=0, stop=INT_MAX; + + Py_Get4Args("O|Oii:join", + joinlist,separator,start,stop); + + Py_Assert(PySequence_Check(joinlist), + PyExc_TypeError, + "first argument needs to be a sequence"); + + joinlist_len = PySequence_Length(joinlist); + Py_Assert(joinlist_len >= 0, + PyExc_TypeError, + "first argument needs to have a __len__ method"); + + Py_CheckSequenceSlice(joinlist_len, start, stop); + + /* Short-cut */ + if ((stop - start) <= 0) { + return PyString_FromString(""); + } + + return mxTextTools_Join(joinlist, + start, stop, + separator); + + onError: + return NULL; +} + +/* + Special compare function for taglist-tuples, comparing + the text-slices given: + - slices starting at a smaller index come first + - for slices starting at the same index, the longer one + wins +*/ + +Py_C_Function( mxTextTools_cmp, + "cmp(a,b)\n\n" + "Compare two valid taglist tuples w/r to their slice\n" + "position; this is useful for sorting joinlists.") +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *v,*w; + int cmp; + + Py_Get2Args("OO:cmp",v,w); + + Py_Assert(PyTuple_Check(v) && PyTuple_Check(w) && + PyTuple_GET_SIZE(v) >= 3 && PyTuple_GET_SIZE(w) >= 3, + PyExc_TypeError, + "invalid taglist-tuple"); + + cmp = PyObject_Compare(PyTuple_GET_ITEM(v,1),PyTuple_GET_ITEM(w,1)); + if (cmp != 0) + return PyInt_FromLong(cmp); + cmp = - PyObject_Compare(PyTuple_GET_ITEM(v,2),PyTuple_GET_ITEM(w,2)); + return PyInt_FromLong(cmp); + + onError: + return NULL; +} + +Py_C_Function( mxTextTools_joinlist, + "joinlist(text,list,start=0,stop=len(text))\n\n" + "Takes a list of tuples (replacement,l,r,...) and produces\n" + "a taglist suitable for join() which creates a copy\n" + "of text where every slice [l:r] is replaced by the\n" + "given replacement\n" + "- the list must be sorted using cmp() as compare function\n" + "- it may not contain overlapping slices\n" + "- the slices may not contain negative indices\n" + "- if the taglist cannot contain overlapping slices, you can\n" + " give this function the taglist produced by tag() directly\n" + " (sorting is not needed, as the list will already be sorted)\n" + "- start and stop set the slice to work in, i.e. text[start:stop]" +) +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *list; + PyObject *text; + Py_ssize_t text_len = INT_MAX; + Py_ssize_t pos = 0; + + Py_Get4Args("OO|ii:joinlist",text,list,pos,text_len); + + return mxTextTools_Joinlist(text, list, pos, text_len); + + onError: + return NULL; +} + +Py_C_Function( mxTextTools_charsplit, + "charsplit(text,char,start=0,stop=len(text))\n\n" + "Split text[start:stop] into substrings at char and\n" + "return the result as list of strings." +) +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *text, *separator; + Py_ssize_t text_len = INT_MAX; + Py_ssize_t start = 0; + + Py_Get4Args("OO|ii:charsplit", + text,separator,start,text_len); + + return mxTextTools_CharSplit(text, separator, + start, text_len); + + onError: + return NULL; +} + +Py_C_Function( mxTextTools_splitat, + "splitat(text,char,nth=1,start=0,stop=len(text))\n\n" + "Split text[start:stop] into two substrings at the nth\n" + "occurance of char and return the result as 2-tuple. If the\n" + "character is not found, the second string is empty. nth may\n" + "be negative: the search is then done from the right and the\n" + "first string is empty in case the character is not found." +) +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *text, *separator; + Py_ssize_t text_len = INT_MAX; + Py_ssize_t start = 0; + Py_ssize_t nth = 1; + + Py_Get5Args("OO|iii:splitat", + text,separator,nth,start,text_len); + + return mxTextTools_SplitAt(text, separator, + nth, start, text_len); + onError: + return NULL; +} + +Py_C_Function( mxTextTools_suffix, + "suffix(text,suffixes,start=0,stop=len(text)[,translate])\n\n" + "Looks at text[start:stop] and returns the first matching\n" + "suffix out of the tuple of strings given in suffixes.\n" + "If no suffix is found to be matching, None is returned.\n" + "The optional 256 char translate string is used to translate\n" + "the text prior to comparing it with the given suffixes." + ) +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *text, *suffixes, *translate = NULL; + Py_ssize_t text_len = INT_MAX; + Py_ssize_t start = 0; + + Py_Get5Args("OO|iiO:suffix", + text,suffixes,start,text_len,translate); + + return mxTextTools_Suffix(text, + suffixes, + start, text_len, + translate); + onError: + return NULL; +} + +Py_C_Function( mxTextTools_prefix, + "prefix(text,prefixes,start=0,stop=len(text)[,translate])\n\n" + "Looks at text[start:stop] and returns the first matching\n" + "prefix out of the tuple of strings given in prefixes.\n" + "If no prefix is found to be matching, None is returned.\n" + "The optional 256 char translate string is used to translate\n" + "the text prior to comparing it with the given suffixes." +) +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *text, *prefixes, *translate = NULL; + Py_ssize_t text_len = INT_MAX; + Py_ssize_t start = 0; + + Py_Get5Args("OO|iiO:prefix", + text,prefixes,start,text_len,translate); + + return mxTextTools_Prefix(text, + prefixes, + start, text_len, + translate); + onError: + return NULL; +} + +Py_C_Function( mxTextTools_set, + "set(string,logic=1)\n\n" + "Returns a character set for string: a bit encoded version\n" + "of the characters occurring in string.\n" + "- logic can be set to 0 if all characters *not* in string\n" + " should go into the set") +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *sto; + char *s,*st; + Py_ssize_t len_s; + int logic = 1; + Py_ssize_t i; + + if (!PyArg_ParseTuple(args,"s#|i:set",&s,&len_s,&logic)) { + goto onError; + } + sto = PyString_FromStringAndSize(NULL,32); + if (sto == NULL) + goto onError; + + st = PyString_AS_STRING(sto); + + if (logic) { + memset(st,0x00,32); + for (i = 0; i < len_s; i++,s++) { + int j = (unsigned char)*s; + + st[j >> 3] |= 1 << (j & 7); + } + } + else { + memset(st,0xFF,32); + for (i = 0; i < len_s; i++,s++) { + int j = (unsigned char)*s; + + st[j >> 3] &= ~(1 << (j & 7)); + } + } + return sto; + + onError: + return NULL; +} + +Py_C_Function( mxTextTools_setfind, + "setfind(text,set,start=0,stop=len(text))\n\n" + "Find the first occurence of any character from set in\n" + "text[start:stop]\n set must be a string obtained with set()\n" + "DEPRECATED: use CharSet().search() instead." +) +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *text; + PyObject *set; + Py_ssize_t text_len = INT_MAX; + Py_ssize_t start = 0; + register Py_ssize_t x; + register char *tx; + register unsigned char *setstr; + + Py_Get4Args("OO|ii:setfind",text,set,start,text_len); + + Py_Assert(PyString_Check(text), + PyExc_TypeError, + "first argument needs to be a string"); + Py_Assert(PyString_Check(set) && PyString_GET_SIZE(set) == 32, + PyExc_TypeError, + "second argument needs to be a set"); + Py_CheckStringSlice(text,start,text_len); + + x = start; + tx = PyString_AS_STRING(text) + x; + setstr = (unsigned char *)PyString_AS_STRING(set); + + for (;x < text_len; tx++, x++) + if (Py_CharInSet(*tx,setstr)) + break; + + if (x == text_len) + /* Not found */ + return PyInt_FromLong(-1L); + else + return PyInt_FromLong(x); + + onError: + return NULL; +} + +Py_C_Function( mxTextTools_setstrip, + "setstrip(text,set,start=0,stop=len(text),mode=0)\n\n" + "Strip all characters in text[start:stop] appearing in set.\n" + "mode indicates where to strip (<0: left; =0: left and right;\n" + ">0: right). set must be a string obtained with set()\n" + "DEPRECATED: use CharSet().strip() instead." + ) +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + char *tx; + Py_ssize_t tx_len; + char *setstr; + Py_ssize_t setstr_len; + Py_ssize_t start = 0; + Py_ssize_t stop = INT_MAX; + int mode = 0; + + Py_Get7Args("s#s#|iii:setstip", + tx,tx_len,setstr,setstr_len,start,stop,mode); + + return mxTextTools_SetStrip(tx, tx_len, + setstr, setstr_len, + start, stop, + mode); + + onError: + return NULL; +} + +Py_C_Function( mxTextTools_setsplit, + "setsplit(text,set,start=0,stop=len(text))\n\n" + "Split text[start:stop] into substrings using set,\n" + "omitting the splitting parts and empty substrings.\n" + "set must be a string obtained from set()\n" + "DEPRECATED: use CharSet().split() instead." + ) +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + char *tx; + Py_ssize_t tx_len; + char *setstr; + Py_ssize_t setstr_len; + Py_ssize_t start = 0; + Py_ssize_t stop = INT_MAX; + + Py_Get6Args("s#s#|ii:setsplit", + tx,tx_len,setstr,setstr_len,start,stop); + + return mxTextTools_SetSplit(tx, tx_len, + setstr, setstr_len, + start, stop); + onError: + return NULL; +} + +Py_C_Function( mxTextTools_setsplitx, + "setsplitx(text,set,start=0,stop=len(text))\n\n" + "Split text[start:stop] into substrings using set, so\n" + "that every second entry consists only of characters in set.\n" + "set must be a string obtained with set()\n" + "DEPRECATED: use CharSet().splitx() instead." + ) +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + Py_ssize_t text_len = INT_MAX; + Py_ssize_t start = 0; + char *tx; + Py_ssize_t tx_len; + char *setstr; + Py_ssize_t setstr_len; + + Py_Get6Args("s#s#|ii:setsplitx", + tx,tx_len,setstr,setstr_len,start,text_len); + + return mxTextTools_SetSplitX(tx, tx_len, + setstr, setstr_len, + start, text_len); + onError: + return NULL; +} + +Py_C_Function( mxTextTools_upper, + "upper(text)\n\n" + "Return text converted to upper case.") +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *text; + + Py_GetArgObject(text); + if (PyString_Check(text)) + return mxTextTools_Upper(text); +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) + return mxTextTools_UnicodeUpper(text); +#endif + else + Py_Error(PyExc_TypeError, + "expected string or unicode"); + + onError: + return NULL; +} + +Py_C_Function( mxTextTools_lower, + "lower(text)\n\n" + "Return text converted to lower case.") +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *text; + + Py_GetArgObject(text); + if (PyString_Check(text)) + return mxTextTools_Lower(text); +#ifdef HAVE_UNICODE + else if (PyUnicode_Check(text)) + return mxTextTools_UnicodeLower(text); +#endif + else + Py_Error(PyExc_TypeError, + "expected string or unicode"); + + onError: + return NULL; +} + +Py_C_Function( mxTextTools_str2hex, + "str2hex(text)\n\n" + "Return text converted to a string consisting of two byte\n" + "HEX values.") +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + char *str; + Py_ssize_t len; + + Py_Get2Args("s#",str,len); + + return mxTextTools_HexStringFromString(str,len); + + onError: + return NULL; +} + +Py_C_Function( mxTextTools_hex2str, + "hex2str(text)\n\n" + "Return text interpreted as two byte HEX values converted\n" + "to a string.") +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + char *str; + Py_ssize_t len; + + Py_Get2Args("s#",str,len); + + return mxTextTools_StringFromHexString(str,len); + + onError: + return NULL; +} + +Py_C_Function( mxTextTools_isascii, + "isascii(text,start=0,stop=len(text))\n\n" + "Return 1/0 depending on whether text only contains ASCII\n" + "characters." + ) +{ + Py_Assert(PySequence_Check(args),PyExc_TypeError,"Arguments are not a tuple?"); + PyObject *text; + Py_ssize_t start=0, stop = INT_MAX; + int rc; + + Py_GetArgObject(text); + rc = mxTextTools_IsASCII(text, start, stop); + if (rc < 0) + goto onError; + return PyInt_FromLong(rc); + + onError: + return NULL; +} + +/* --- module init --------------------------------------------------------- */ + +/* Python Method Table */ + +static PyMethodDef Module_methods[] = +{ + Py_MethodWithKeywordsListEntry("tag",mxTextTools_tag), + Py_MethodListEntry("join",mxTextTools_join), + Py_MethodListEntry("cmp",mxTextTools_cmp), + Py_MethodListEntry("joinlist",mxTextTools_joinlist), + Py_MethodListEntry("set",mxTextTools_set), + Py_MethodListEntry("setfind",mxTextTools_setfind), + Py_MethodListEntry("setsplit",mxTextTools_setsplit), + Py_MethodListEntry("setsplitx",mxTextTools_setsplitx), + Py_MethodListEntry("setstrip",mxTextTools_setstrip), + Py_MethodWithKeywordsListEntry("TextSearch",mxTextSearch_TextSearch), + Py_MethodListEntry("CharSet",mxCharSet_CharSet), + Py_MethodListEntry("TagTable",mxTagTable_TagTable), +#ifdef HAVE_UNICODE + Py_MethodListEntry("UnicodeTagTable",mxTagTable_UnicodeTagTable), +#endif + Py_MethodListEntrySingleArg("upper",mxTextTools_upper), + Py_MethodListEntrySingleArg("lower",mxTextTools_lower), + Py_MethodListEntry("charsplit",mxTextTools_charsplit), + Py_MethodListEntry("splitat",mxTextTools_splitat), + Py_MethodListEntry("suffix",mxTextTools_suffix), + Py_MethodListEntry("prefix",mxTextTools_prefix), + Py_MethodListEntry("hex2str",mxTextTools_hex2str), + Py_MethodListEntry("str2hex",mxTextTools_str2hex), + Py_MethodListEntrySingleArg("isascii",mxTextTools_isascii), + {NULL,NULL} /* end of list */ +}; + +/* Cleanup function */ +static +void mxTextToolsModule_Cleanup(void) +{ + mxTextTools_TagTables = NULL; + + /* Reset mxTextTools_Initialized flag */ + mxTextTools_Initialized = 0; +} + +MX_EXPORT(void) + initmxTextTools(void) +{ + PyObject *module, *moddict; + + if (mxTextTools_Initialized) + Py_Error(PyExc_SystemError, + "can't initialize "MXTEXTTOOLS_MODULE" more than once"); + + /* Init type objects */ + PyType_Init(mxTextSearch_Type); +#ifdef MXFASTSEARCH + PyType_Init(mxFS_Type); +#endif + PyType_Init(mxCharSet_Type); + PyType_Init(mxTagTable_Type); + + /* create module */ + module = Py_InitModule4(MXTEXTTOOLS_MODULE, /* Module name */ + Module_methods, /* Method list */ + Module_docstring, /* Module doc-string */ + (PyObject *)NULL, /* always pass this as *self */ + PYTHON_API_VERSION); /* API Version */ + if (!module) + goto onError; + + /* Init TagTable cache */ + if ((mxTextTools_TagTables = PyDict_New()) == NULL) + goto onError; + + /* Register cleanup function */ + if (Py_AtExit(mxTextToolsModule_Cleanup)) + /* XXX what to do if we can't register that function ??? */; + + /* Add some symbolic constants to the module */ + moddict = PyModule_GetDict(module); + PyDict_SetItemString(moddict, + "__version__", + PyString_FromString(VERSION)); + + mx_ToUpper = mxTextTools_ToUpper(); + PyDict_SetItemString(moddict, + "to_upper", + mx_ToUpper); + + mx_ToLower = mxTextTools_ToLower(); + PyDict_SetItemString(moddict, + "to_lower", + mx_ToLower); + + /* Let the tag table cache live in the module dictionary; we just + keep a weak reference in mxTextTools_TagTables around. */ + PyDict_SetItemString(moddict, + "tagtable_cache", + mxTextTools_TagTables); + Py_DECREF(mxTextTools_TagTables); + + insint(moddict, "BOYERMOORE", MXTEXTSEARCH_BOYERMOORE); + insint(moddict, "FASTSEARCH", MXTEXTSEARCH_FASTSEARCH); + insint(moddict, "TRIVIAL", MXTEXTSEARCH_TRIVIAL); + + /* Init exceptions */ + if ((mxTextTools_Error = insexc(moddict, + "Error", + PyExc_StandardError)) == NULL) + goto onError; + + /* Type objects */ + Py_INCREF(&mxTextSearch_Type); + PyDict_SetItemString(moddict, "TextSearchType", + (PyObject *)&mxTextSearch_Type); + Py_INCREF(&mxCharSet_Type); + PyDict_SetItemString(moddict, "CharSetType", + (PyObject *)&mxCharSet_Type); + Py_INCREF(&mxTagTable_Type); + PyDict_SetItemString(moddict, "TagTableType", + (PyObject *)&mxTagTable_Type); + + /* Tag Table command symbols (these will be exposed via + simpleparse.stt.TextTools.Constants.TagTables) */ + insint(moddict, "_const_AllIn", MATCH_ALLIN); + insint(moddict, "_const_AllNotIn", MATCH_ALLNOTIN); + insint(moddict, "_const_Is", MATCH_IS); + insint(moddict, "_const_IsIn", MATCH_ISIN); + insint(moddict, "_const_IsNot", MATCH_ISNOTIN); + insint(moddict, "_const_IsNotIn", MATCH_ISNOTIN); + + insint(moddict, "_const_Word", MATCH_WORD); + insint(moddict, "_const_WordStart", MATCH_WORDSTART); + insint(moddict, "_const_WordEnd", MATCH_WORDEND); + + insint(moddict, "_const_AllInSet", MATCH_ALLINSET); + insint(moddict, "_const_IsInSet", MATCH_ISINSET); + insint(moddict, "_const_AllInCharSet", MATCH_ALLINCHARSET); + insint(moddict, "_const_IsInCharSet", MATCH_ISINCHARSET); + + insint(moddict, "_const_Fail", MATCH_FAIL); + insint(moddict, "_const_Jump", MATCH_JUMP); + insint(moddict, "_const_EOF", MATCH_EOF); + insint(moddict, "_const_Skip", MATCH_SKIP); + insint(moddict, "_const_Move", MATCH_MOVE); + + insint(moddict, "_const_JumpTarget", MATCH_JUMPTARGET); + + insint(moddict, "_const_sWordStart", MATCH_SWORDSTART); + insint(moddict, "_const_sWordEnd", MATCH_SWORDEND); + insint(moddict, "_const_sFindWord", MATCH_SFINDWORD); + insint(moddict, "_const_NoWord", MATCH_NOWORD); + + insint(moddict, "_const_Call", MATCH_CALL); + insint(moddict, "_const_CallArg", MATCH_CALLARG); + + insint(moddict, "_const_Table", MATCH_TABLE); + insint(moddict, "_const_SubTable", MATCH_SUBTABLE); + insint(moddict, "_const_TableInList", MATCH_TABLEINLIST); + insint(moddict, "_const_SubTableInList", MATCH_SUBTABLEINLIST); + + insint(moddict, "_const_Loop", MATCH_LOOP); + insint(moddict, "_const_LoopControl", MATCH_LOOPCONTROL); + + /* Tag Table command flags */ + insint(moddict, "_const_CallTag", MATCH_CALLTAG); + insint(moddict, "_const_AppendToTagobj", MATCH_APPENDTAG); + insint(moddict, "_const_AppendTagobj", MATCH_APPENDTAGOBJ); + insint(moddict, "_const_AppendMatch", MATCH_APPENDMATCH); + insint(moddict, "_const_LookAhead", MATCH_LOOKAHEAD); + + /* Tag Table argument integers */ + insint(moddict, "_const_To", MATCH_JUMP_TO); + insint(moddict, "_const_MatchOk", MATCH_JUMP_MATCHOK); + insint(moddict, "_const_MatchFail", MATCH_JUMP_MATCHFAIL); + insint(moddict, "_const_ToEOF", MATCH_MOVE_EOF); + insint(moddict, "_const_ToBOF", MATCH_MOVE_BOF); + insint(moddict, "_const_Here", MATCH_FAIL_HERE); + + insint(moddict, "_const_ThisTable", MATCH_THISTABLE); + + insint(moddict, "_const_Break", MATCH_LOOPCONTROL_BREAK); + insint(moddict, "_const_Reset", MATCH_LOOPCONTROL_RESET); + + DPRINTF("sizeof(string_charset)=%i bytes\n", sizeof(string_charset)); +#ifdef HAVE_UNICODE + DPRINTF("sizeof(unicode_charset)=%i bytes\n", sizeof(unicode_charset)); +#endif + + /* We are now initialized */ + mxTextTools_Initialized = 1; + + onError: + /* Check for errors and report them */ + if (PyErr_Occurred()) + Py_ReportModuleInitError(MXTEXTTOOLS_MODULE); + return; +} diff -Nru simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/mxTextTools.def simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/mxTextTools.def --- simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/mxTextTools.def 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/mxTextTools.def 2012-10-17 20:54:31.000000000 +0000 @@ -0,0 +1,2 @@ +EXPORTS + initmxTextTools diff -Nru simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/mxTextTools.h simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/mxTextTools.h --- simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/mxTextTools.h 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/mxTextTools.h 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,266 @@ +#ifndef MXTEXTTOOLS_H +#define MXTEXTTOOLS_H +/* + mxTextTools -- Fast text manipulation routines + + Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com + Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com +*/ + +/* The extension's name; must be the same as the init function's suffix */ +#define MXTEXTTOOLS_MODULE "mxTextTools" + +#include "mxbmse.h" +#ifdef MXFASTSEARCH +# include "private/mxfse.h" +#endif + +/* Include generic mx extension header file */ +#include "mxh.h" + +#ifdef MX_BUILDING_MXTEXTTOOLS +# define MXTEXTTOOLS_EXTERNALIZE MX_EXPORT +#else +# define MXTEXTTOOLS_EXTERNALIZE MX_IMPORT +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* --- Text Search Object ---------------------------------------*/ + +/* Algorithm values */ +#define MXTEXTSEARCH_BOYERMOORE 0 +#define MXTEXTSEARCH_FASTSEARCH 1 +#define MXTEXTSEARCH_TRIVIAL 2 + +typedef struct { + PyObject_HEAD + PyObject *match; /* Match string object */ + PyObject *translate; /* Translate string object or NULL */ + int algorithm; /* Algorithm to be used */ + void *data; /* Internal data used by the algorithm or NULL */ +} mxTextSearchObject; + +MXTEXTTOOLS_EXTERNALIZE(PyTypeObject) mxTextSearch_Type; + +#define mxTextSearch_Check(v) \ + (Py_TYPE((v)) == &mxTextSearch_Type) + +/* Exporting these APIs for mxTextTools internal use only ! */ + +extern +Py_ssize_t mxTextSearch_MatchLength(PyObject *self); + +extern +Py_ssize_t mxTextSearch_SearchBuffer(PyObject *self, + char *text, + Py_ssize_t start, + Py_ssize_t stop, + Py_ssize_t *sliceleft, + Py_ssize_t *sliceright); + +#ifdef HAVE_UNICODE +extern +Py_ssize_t mxTextSearch_SearchUnicode(PyObject *self, + Py_UNICODE *text, + Py_ssize_t start, + Py_ssize_t stop, + Py_ssize_t *sliceleft, + Py_ssize_t *sliceright); +#endif + +/* --- Character Set Object -------------------------------------*/ + +/* Mode values */ +#define MXCHARSET_8BITMODE 0 +#define MXCHARSET_UCS2MODE 1 +#define MXCHARSET_UCS4MODE 2 + +typedef struct { + PyObject_HEAD + PyObject *definition; /* Character set definition */ + int mode; /* Operation mode: + 0 - 8-bit character lookup + 1 - UCS-2 Unicode lookup + 2 - UCS-4 Unicode lookup + */ + void *lookup; /* Lookup table */ +} mxCharSetObject; + +MXTEXTTOOLS_EXTERNALIZE(PyTypeObject) mxCharSet_Type; + +#define mxCharSet_Check(v) \ + (Py_TYPE((v)) == &mxCharSet_Type) + + +/* Exporting these APIs for mxTextTools internal use only ! */ + +extern +int mxCharSet_ContainsChar(PyObject *self, + register unsigned char ch); + +#ifdef HAVE_UNICODE +extern +int mxCharSet_ContainsUnicodeChar(PyObject *self, + register Py_UNICODE ch); +#endif + +extern +Py_ssize_t mxCharSet_Match(PyObject *self, + PyObject *text, + Py_ssize_t start, + Py_ssize_t stop, + int direction); + +/* --- Tag Table Object -----------------------------------------*/ + +typedef struct { + PyObject *tagobj; /* Tag object to assign, call, + append, etc. or NULL */ + int cmd; /* Command integer */ + int flags; /* Command flags */ + PyObject *args; /* Command arguments */ + int jne; /* Non-match jump offset */ + int je; /* Match jump offset */ +} mxTagTableEntry; + +#define MXTAGTABLE_STRINGTYPE 0 +#define MXTAGTABLE_UNICODETYPE 1 + +typedef struct { + PyObject_VAR_HEAD + PyObject *definition; /* Reference to the original + table definition or NULL; + needed for caching */ + int tabletype; /* Type of compiled table: + 0 - 8-bit string args + 1 - Unicode args */ + int numentries; /* number of allocated entries */ + mxTagTableEntry entry[1]; /* Variable length array of + mxTagTableEntry fields */ +} mxTagTableObject; + +MXTEXTTOOLS_EXTERNALIZE(PyTypeObject) mxTagTable_Type; + +#define mxTagTable_Check(v) \ + (Py_TYPE((v)) == &mxTagTable_Type) + +#define mxTagTable_Type(v) \ + (((mxTagTableObject *)(v))->tabletype) +#define mxTagTable_Definition(v) \ + (((mxTagTableObject *)(v))->definition) + +/* Exporting these APIs for mxTextTools internal use only ! */ +extern +PyObject *mxTagTable_New(PyObject *definition, + int tabletype, + int cacheable); + +/* --- Tagging Engine -------------------------------------------*/ + +/* Exporting these APIs for mxTextTools internal use only ! */ + +/* mxTextTools_TaggingEngine(): a table driven parser engine + + - return codes: rc = 2: match ok; rc = 1: match failed; rc = 0: error + - doesn't check type of passed arguments ! + - doesn't increment reference counts of passed objects ! +*/ + +extern +int mxTextTools_TaggingEngine(PyObject *textobj, + Py_ssize_t text_start, + Py_ssize_t text_stop, + mxTagTableObject *table, + PyObject *taglist, + PyObject *context, + Py_ssize_t *next); + +extern +int mxTextTools_UnicodeTaggingEngine(PyObject *textobj, + Py_ssize_t text_start, + Py_ssize_t text_stop, + mxTagTableObject *table, + PyObject *taglist, + PyObject *context, + Py_ssize_t *next); + +/* Command integers for cmd; see Constants/TagTable.py for details */ + +/* Low-level string matching, using the same simple logic: + - match has to be a string + - they only modify x (the current position in text) +*/ +#define MATCH_ALLIN 11 +#define MATCH_ALLNOTIN 12 +#define MATCH_IS 13 +#define MATCH_ISIN 14 +#define MATCH_ISNOTIN 15 + +#define MATCH_WORD 21 +#define MATCH_WORDSTART 22 +#define MATCH_WORDEND 23 + +#define MATCH_ALLINSET 31 +#define MATCH_ISINSET 32 + +#define MATCH_ALLINCHARSET 41 +#define MATCH_ISINCHARSET 42 + +#define MATCH_MAX_LOWLEVEL 99 + +/* Jumps and other low-level special commands */ + +#define MATCH_FAIL 100 +#define MATCH_JUMP MATCH_FAIL + +#define MATCH_EOF 101 +#define MATCH_SKIP 102 +#define MATCH_MOVE 103 + +#define MATCH_JUMPTARGET 104 + +#define MATCH_MAX_SPECIALS 199 + +/* Higher-level string matching */ + +#define MATCH_SWORDSTART 211 +#define MATCH_SWORDEND 212 +#define MATCH_SFINDWORD 213 +#define MATCH_NOWORD MATCH_SWORDSTART + +/* Higher-level special commands */ +#define MATCH_CALL 201 +#define MATCH_CALLARG 202 +#define MATCH_TABLE 203 +#define MATCH_SUBTABLE 207 +#define MATCH_TABLEINLIST 204 +#define MATCH_SUBTABLEINLIST 208 +#define MATCH_LOOP 205 +#define MATCH_LOOPCONTROL 206 + +/* Special argument integers */ +#define MATCH_JUMP_TO 0 +#define MATCH_JUMP_MATCHOK 1000000 +#define MATCH_JUMP_MATCHFAIL -1000000 +#define MATCH_MOVE_EOF -1 +#define MATCH_MOVE_BOF 0 +#define MATCH_FAIL_HERE 1 +#define MATCH_THISTABLE 999 +#define MATCH_LOOPCONTROL_BREAK 0 +#define MATCH_LOOPCONTROL_RESET -1 + +/* Flags set in cmd (>=256) */ +#define MATCH_CALLTAG (1 << 8) +#define MATCH_APPENDTAG (1 << 9) +#define MATCH_APPENDTAGOBJ (1 << 10) +#define MATCH_APPENDMATCH (1 << 11) +#define MATCH_LOOKAHEAD (1 << 12) + +/* EOF */ +#ifdef __cplusplus +} +#endif +#endif diff -Nru simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/recursecommands.h simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/recursecommands.h --- simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/recursecommands.h 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/recursecommands.h 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,119 @@ +/* recursive tag-table commands */ + +case MATCH_TABLE: +case MATCH_SUBTABLE: +case MATCH_TABLEINLIST: +case MATCH_SUBTABLEINLIST: + { + PyObject * newTable = NULL; + + if (childReturnCode == NULL_CODE ) { + /* haven't yet parsed the sub-table match */ + switch (command) { + /* determine the table to which we will transfer control */ + case MATCH_TABLE: + case MATCH_SUBTABLE: + { + /* switch to either current tag table or a compiled sub-table */ + if (PyInt_Check(match) && + PyInt_AS_LONG(match) == MATCH_THISTABLE) { + newTable = (PyObject *)table; + } else { + newTable = match; + } + + /* XXX Fix to auto-compile that match argument + + Should also test that it _is_ a compiled TagTable, + rather than that it _isn't_ a tuple? + */ + if (!mxTagTable_Check(newTable)) { + childReturnCode = ERROR_CODE; + errorType = PyExc_TypeError; + errorMessage = PyString_FromFormat( + "Match argument must be compiled TagTable: was a %.50s", + Py_TYPE(newTable)->tp_name + ); + } else { + /* we decref in POP */ + Py_INCREF(newTable); + } + break; + } + case MATCH_TABLEINLIST: + case MATCH_SUBTABLEINLIST: + { + /* switch to explicitly specified table in a list (compiling if necessary) */ + + + newTable = PyList_GetItem( + PyTuple_GET_ITEM(match, 0), + PyInt_AS_LONG( + PyTuple_GET_ITEM(match, 1) + ) + ); + if (newTable == NULL) { + childReturnCode = ERROR_CODE; + errorType = PyExc_TypeError; + errorMessage = PyString_FromFormat( + "Tag table entry %d: Could not find target table in list of tables", + (unsigned int)index + ); + } else { + if (mxTagTable_Check(newTable)) { + /* This is decref'd in POP */ + Py_INCREF(newTable); + } else { + /* These tables are considered to be + cacheable. */ + newTable = mxTagTable_New(newTable, + table->tabletype, + 1); + /* why didn't we increment the refcount here? does New give us a new ref? */ + if (newTable == NULL) { + childReturnCode = ERROR_CODE; + errorType = PyExc_TypeError; + errorMessage = PyString_FromFormat( + "Tag table entry %d: Could not compile target table", + (unsigned int)index + ); + } + } + } + break; + } + + } + + if (childReturnCode == NULL_CODE) { + /* we found a valid newTable */ + PyObject *subtags = NULL; + + if (taglist != Py_None && command != MATCH_SUBTABLE && command != MATCH_SUBTABLEINLIST) { + /* Create a new list for use as subtaglist + + Will be decref'd by the child-finished clause if necessary + */ + subtags = PyList_New(0); + if (subtags == NULL) { + childReturnCode = ERROR_CODE; + errorType = PyExc_MemoryError; + } + } else { + /* Use taglist as subtaglist + + We don't incref it as we check explicitly for whether + it's the same when we go to decref (during childReturnCode + handling) + */ + subtags = taglist; + } + + /* match other table */ + PUSH_STACK( newTable, subtags ); + RESET_TABLE_VARIABLES + } + } + break; + } + diff -Nru simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/speccommands.h simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/speccommands.h --- simpleparse-2.1.0a1/simpleparse/stt/TextTools/mxTextTools/speccommands.h 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/TextTools/mxTextTools/speccommands.h 2012-10-17 20:54:31.000000000 +0000 @@ -0,0 +1,101 @@ +/* "Special" commands code fragment + + The contract here is: + + The commands may alter any of the tag-specific variables + + errors may be indicated if encountered in childReturnCode and the error* variables + + setting childReturnCode equal to FAILURE_CODE declares that the read head has not moved + + childReturnCode must be set (or default "you have to move forward to match" semantics are used) + +*/ +/* doesn't there need to be a check for integer arguments + that the value is an integer? + Or does the compiler do that now */ + + + + case MATCH_FAIL: /* == MATCH_JUMP */ + /* dumb question, what is MATCH_JUMP supposed to do? */ + childReturnCode = FAILURE_CODE; + break; + + case MATCH_SKIP: + /* Argh, what to do when moves past buffer? + + Where do we check that this is still in-bounds? + documented as always succeeding, but results in + result-tuples with negative or out-of-range values + in current code. + Can't do: + if (childPosition < sliceleft) { + childPosition = 0; + } else if (childPosition > sliceright) { + childPosition = sliceright; + } + because we might have another move, or an EOF + or whatever coming up. + + Marc-André want's these conditions: + childPosition < 0 { # (not sliceleft!) + raise TypeError: Tag Table entry %(index): moved/skipped beyond start of text + } and no check for > right or beyond end of buffer... + */ + DPRINTF("\nSkip %li characters\n" + " in string = '%.40s'\n", + PyInt_AS_LONG(match),text+childPosition); + childPosition += PyInt_AS_LONG(match); + childReturnCode = SUCCESS_CODE; + break; + + case MATCH_MOVE: + /* same potential out-of-bounds issue as with skip */ + childPosition = PyInt_AS_LONG(match); + if (childPosition < 0) { + /* Relative to end of the slice */ + childPosition += sliceright + 1; + } else { + /* Relative to beginning of the slice */ + childPosition += sliceleft; + } + DPRINTF("\nMove to position %i \n" + " string = '%.40s'\n", + childPosition,text+childPosition); + childReturnCode = SUCCESS_CODE; + break; + + case MATCH_EOF: + DPRINTF("\nEOF at position %i ? \n" + " string = '%.40s'\n", + childPosition,text+childPosition); + + if (sliceright > childPosition) { /* not matched */ + childReturnCode = FAILURE_CODE; + } else { + /* I don't see why this would necessarily be the end of the parsing run, after all + you might want to match EOF, then back up X characters? The documentation doesn't + mention anything about such a restriction. + + Approach here seems to match documentation functionality + but still suffers the out-of-range problems seen in move + and skip commands as well. + */ + childReturnCode = SUCCESS_CODE; + childPosition = sliceright; + childStart = sliceright; + } + break; + + + case MATCH_JUMPTARGET: + /* note: currently this can report a value, though I don't think + that was intended originally. I see it as useful because it lets + you enter a flag in the results table just by specifying a non-None + tagobj */ + /* null operation */ + DPRINTF("\nJumpTarget '%.40s' (skipped)\n", + PyString_AsString(match)); + childReturnCode = SUCCESS_CODE; + break; diff -Nru simpleparse-2.1.0a1/simpleparse/stt/TextTools/README simpleparse-2.2.0/simpleparse/stt/TextTools/README --- simpleparse-2.1.0a1/simpleparse/stt/TextTools/README 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/TextTools/README 2012-10-17 20:54:31.000000000 +0000 @@ -0,0 +1,4 @@ +Please see the documentation in the Doc/ subdirectory for +further information on installation and usage. + +Marc-Andre Lemburg, mal@lemburg.com diff -Nru simpleparse-2.1.0a1/simpleparse/stt/TextTools/TextTools.py simpleparse-2.2.0/simpleparse/stt/TextTools/TextTools.py --- simpleparse-2.1.0a1/simpleparse/stt/TextTools/TextTools.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/stt/TextTools/TextTools.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,807 @@ +""" mxTextTools - A tools package for fast text processing. + + Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com + Copyright (c) 2000-2003, eGenix.com Software GmbH; mailto:info@egenix.com + See the documentation for further information on copyrights, + or contact the author. All Rights Reserved. +""" +from __future__ import print_function + +import types + +# +# import the C module and the version number +# +from .mxTextTools import * +from .mxTextTools import __version__ + +# +# import the symbols needed to write tag tables +# +from .Constants.TagTables import * + +# +# import the some handy character sets +# +from .Constants.Sets import * + +# +# format and print tables, taglists and joinlists: +# +def format_entry(table,i, + + TupleType=tuple): + + """ Returns a pp-formatted tag table entry as string + """ + e = table[i] + jne = 0 + je = 1 + t,c,m = e[:3] + if len(e)>3: jne = e[3] + if len(e)>4: je = e[4] + flags,cmd = divmod(c,256) + c = id2cmd[cmd] + if type(m) == TupleType and c in ('Table','SubTable'): + m = '' + elif m == None: + m = 'Here/To' + else: + m = repr(m) + if len(m) > 17: + m = m[:17]+'...' + return '%-15.15s : %-30s : jne=%+i : je=%+i' % \ + (repr(t),'%-.15s : %s'%(c,m),jne,je) + +def format_table(table,i=-1): + + """ Returns a pp-formatted version of the tag table as string """ + + l = [] + for j in range(len(table)): + if i == j: + l.append('--> '+format_entry(table,j)) + else: + l.append(' '+format_entry(table,j)) + return '\n'.join(l)+'\n' + +def print_tagtable(table): + + """ Print the tag table + """ + print(format_table(table)) + +def print_tags(text,tags,indent=0): + + """ Print the taglist tags for text using the given indent level + """ + for tag,l,r,subtags in tags: + tagname = repr(tag) + if len(tagname) > 20: + tagname = tagname[:20] + '...' + target = repr(text[l:r]) + if len(target) > 60: + target = target[:60] + '...' + if subtags == None: + print(' '+indent*' |',tagname,': ',target,(l,r)) + else: + print(' '+indent*' |',tagname,': ',target,(l,r)) + print_tags(text,subtags,indent+1) + +def print_joinlist(joins,indent=0, + + StringType=str): + + """ Print the joinlist joins using the given indent level + """ + for j in joins: + if type(j) == StringType: + text = repr(j) + if len(text) > 40: + text = text[:40] + '...' + print(' '+indent*' |',text,' (len = %i)' % len(j)) + else: + text = j[0] + l,r = j[1:3] + text = repr(text[l:r]) + if len(text) > 40: + text = text[:40] + '...' + print(' '+indent*' |',text,' (len = %i)' % (r-l),(l,r)) + +def normlist(jlist, + + StringType=str): + + """ Return a normalized joinlist. + + All tuples in the joinlist are turned into real strings. The + resulting list is a equivalent copy of the joinlist only + consisting of strings. + + """ + l = [''] * len(jlist) + for i in range(len(jlist)): + entry = jlist[i] + if type(entry) == StringType: + l[i] = entry + else: + l[i] = entry[0][entry[1]:entry[2]] + return l + +# +# aid for matching from a list of words +# +def _lookup_dict(l,index=0): + + d = {} + for w in l: + c = w[index] + if c in d: + d[c].append(w) + else: + d[c] = [w] + return d + +def word_in_list(l): + + """ Creates a lookup table that matches the words in l + """ + t = [] + d = _lookup_dict(l) + keys = list(d.keys()) + if len(keys) < 18: # somewhat arbitrary bound + # fast hint for small sets + t.append((None,IsIn,''.join(list(d.keys())))) + t.append((None,Skip,-1)) + # test groups + for c, group in list(d.items()): + t.append(None) # hint will be filled in later + i = len(t)-1 + for w in group: + t.append((None,Word,w[1:],+1,MatchOk)) + t.append((None,Fail,Here)) + # add hint + t[i] = (None,Is,c,len(t)-i) + t.append((None,Fail,Here)) + return tuple(t) + +# +# Extra stuff useful in combination with the C functions +# + +def replace(text,what,with_what,start=0,stop=None, + + SearchObject=TextSearch,join=join,joinlist=joinlist,tag=tag, + string_replace=str.replace,type=type, + StringType=str): + + """A fast replacement for string.replace. + + what can be given as string or search object. + + This function is a good example for the AppendTagobj-flag usage + (the taglist can be used directly as joinlist). + + """ + if type(what) is not TextSearchType: + so = SearchObject(what) + else: + so = what + what = so.match + if stop is None: + if start == 0 and len(what) < 2: + return string_replace(text,what,with_what) + stop = len(text) + t = ((text,sWordStart,so,+2), + # Found something, replace and continue searching + (with_what,Skip+AppendTagobj,len(what),-1,-1), + # Rest of text + (text,Move,ToEOF) + ) + found,taglist,last = tag(text,t,start,stop) + if not found: + return text + return join(taglist) + +# Alternative (usually slower) versions using different techniques: + +def _replace2(text,what,with_what,start=0,stop=None, + + join=join,joinlist=joinlist,tag=tag, + TextSearchType=TextSearchType,TextSearch=TextSearch): + + """Analogon to string.replace; returns a string with_what all occurences + of what in text[start:stop] replaced by with_what. + + This version uses a one entry tag-table and a + Boyer-Moore-Search-object. what can be a string or a + TextSearch search object. It's faster than string.replace in + those cases, where the what-string gets long and/or many + replacements are found; faster meaning from a few percent up to + many times as fast + + start and stop define the slice of text to work in. stop + defaults to len(text). + + """ + if stop is None: + stop = len(text) + if type(what) is not TextSearchType: + what=TextSearch(what) + t = ((with_what,sFindWord,what,+1,+0),) + found,taglist,last = tag(text,t,start,stop) + if not found: + return text + return join(joinlist(text,taglist)) + +def _replace3(text,what,with_what, + + join=str.join,TextSearch=TextSearch, + TextSearchType=TextSearchType): + + if type(what) is not TextSearchType: + what=TextSearch(what) + slices = what.findall(text) + if not slices: + return text + l = [] + x = 0 + for left,right in slices: + l.append(text[x:left] + with_what) + x = right + l.append(text[x:]) + return join(l,'') + +def _replace4(text,what,with_what, + + join=join,joinlist=joinlist,tag=tag,TextSearch=TextSearch, + TextSearchType=TextSearchType): + + if type(what) is not TextSearchType: + what=TextSearch(what) + slices = what.findall(text) + if not slices: + return text + repl = [None]*len(slices) + for i in range(len(slices)): + repl[i] = (with_what,)+slices[i] + return join(joinlist(text,repl)) + +def multireplace(text,replacements,start=0,stop=None, + + join=join,joinlist=joinlist): + + """ Apply multiple replacement to a text at once. + + replacements must be list of tuples (replacement, left, + right). It is used to replace the slice text[left:right] with_what + the string replacement. + + Note that the replacements do not affect one another. Indices + always refer to the original text string. + + Replacements must not overlap. Otherwise a ValueError is + raised. + + """ + if stop is not None: + return join(joinlist(text,replacements,start,stop)) + else: + return join(joinlist(text,replacements,start)) + +def find(text,what,start=0,stop=None, + + SearchObject=TextSearch): + + """ A faster replacement for string.find(). + + Uses a search object for the task. Returns the position of the + first occurance of what in text[start:stop]. stop defaults to + len(text). Returns -1 in case no occurance was found. + + """ + if stop is not None: + return SearchObject(what).find(text,start,stop) + else: + return SearchObject(what).find(text,start) + +def findall(text,what,start=0,stop=None, + + SearchObject=TextSearch): + + """ Find all occurances of what in text. + + Uses a search object for the task. Returns a list of slice + tuples (l,r) marking the all occurances in + text[start:stop]. stop defaults to len(text). Returns an + empty list in case no occurance was found. + + """ + if stop is not None: + return SearchObject(what).findall(text,start,stop) + else: + return SearchObject(what).findall(text,start) + +def split(text,sep,start=0,stop=None,translate=None, + + SearchObject=TextSearch): + + """ A faster replacement for string.split(). + + Uses a search object for the task. Returns the result of + cutting the text[start:stop] string into snippets at every sep + occurance in form of a list of substrings. translate is passed + to the search object as translation string if given. + + XXX Undocumented. + + """ + if translate: + so = SearchObject(sep,translate) + else: + so = SearchObject(sep) + if stop: + cuts = so.findall(text,start,stop) + else: + cuts = so.findall(text,start) + l = 0 + list = [] + append = list.append + for left,right in cuts: + append(text[l:left]) + l = right + append(text[l:]) + return list + +# helper for tagdict +def _tagdict(text,dict,prefix,taglist): + + for o,l,r,s in taglist: + pfx = prefix + str(o) + dict[pfx] = text[l:r] + if s: + _tagdict(text,dict,pfx+'.',s) + +def tagdict(text,*args): + + """ Tag a text just like the function tag() and then convert + its output into a dictionary where the tagobjects reference + their respective strings + + This function emulates the interface of tag(). In contrast to + tag() this funtion *does* make copies of the found stings, + though. + + Returns a tuple (rc,tagdict,next) with_what the same meaning of rc + and next as tag(); tagdict is the new dictionary or None in + case rc is 0. + + """ + rc,taglist,next = tag(*(text,)+args) + if not rc: + return (rc,None,next) + d = {} + tagdict = _tagdict + for o,l,r,s in taglist: + pfx = str(o) + d[pfx] = text[l:r] + if s: + tagdict(text,d,pfx+'.',s) + return (rc,d,next) + +def invset(chars): + + """ Return a set with_what all characters *except* the ones in chars. + """ + return set(chars,0) + +def is_whitespace(text,start=0,stop=None, + + charset=nonwhitespace_charset): + + """ Return 1 iff text[start:stop] only contains whitespace + characters (as defined in Constants/Sets.py), 0 otherwise. + + """ + if stop is None: + stop = len(text) + return (charset.search(text, 1, start, stop) is None) + +def collapse(text,separator=' ', + + join=join,charset=CharSet(newline+whitespace)): + + """ Eliminates newline characters and compresses whitespace + characters into one space. + + The result is a one line text string. Tim Peters will like + this function called with_what '-' separator ;-) + + """ + return join(charset.split(text), separator) + +_linesplit_table = ( + (None,Is,'\r',+1), + (None,Is,'\n',+1), + ('line',AllInCharSet+AppendMatch,CharSet('^\r\n'),+1,-2), + (None,EOF,Here,+1,MatchOk), + ('empty line',Skip+AppendMatch,0,0,-4), + ) + +def splitlines(text, + + tag=tag,linesplit_table=_linesplit_table): + + """ Split text into a list of single lines. + + The following combinations are considered to be line-ends: + '\r', '\r\n', '\n'; they may be used in any combination. The + line-end indicators are removed from the strings prior to + adding them to the list. + + This function allows dealing with text files from Macs, PCs + and Unix origins in a portable way. + + """ + return tag(text, linesplit_table)[1] + +# Alias for backward compatibility +linesplit = splitlines + +_linecount_table = ( + (None,Is,'\r',+1), + (None,Is,'\n',+1), + ('line',AllInCharSet,CharSet('^\r\n'),+1,-2), + (None,EOF,Here,+1,MatchOk), + ('empty line',Skip,0,0,-4), + ) + +def countlines(text, + + linecount_table=_linecount_table): + + """ Returns the number of lines in text. + + Line ends are treated just like for splitlines() in a + portable way. + + """ + return len(tag(text, linecount_table)[1]) + +_wordsplit_table = ( + (None,AllInCharSet,whitespace_charset,+1), + ('word',AllInCharSet+AppendMatch,nonwhitespace_charset,+1,-1), + (None,EOF,Here,+1,MatchOk), + ) + +def splitwords(text, + + charset=whitespace_charset): + + """ Split text into a list of single words. + + Words are separated by whitespace. The whitespace is stripped + before adding the words to the list. + + """ + return charset.split(text) + +# +# Testing and benchmarking +# + +# Taken from my hack.py module: +import time +class _timer: + + """ timer class with a quite obvious interface + - .start() starts a fairly accurate CPU-time timer plus an + absolute timer + - .stop() stops the timer and returns a tuple: the CPU-time in seconds + and the absolute time elapsed since .start() was called + """ + + utime = 0 + atime = 0 + + def start(self, + + clock=time.clock,time=time.time): + + self.atime = time() + self.utime = clock() + + def stop(self, + + clock=time.clock,time=time.time): + + self.utime = clock() - self.utime + self.atime = time() - self.atime + return self.utime,self.atime + + def usertime(self, + + clock=time.clock,time=time.time): + + self.utime = clock() - self.utime + self.atime = time() - self.atime + return self.utime + + def abstime(self, + + clock=time.clock,time=time.time): + + self.utime = clock() - self.utime + self.atime = time() - self.atime + return self.utime + + def __str__(self): + + return '%0.2fu %0.2fa sec.' % (self.utime,self.atime) + +def _bench(file='mxTextTools/mxTextTools.c'): + + def mismatch(orig,new): + print() + for i in range(len(orig)): + if orig[i] != new[i]: + break + else: + print('Length mismatch: orig=%i new=%i' % (len(orig),len(new))) + if len(orig) > len(new): + print('Missing chars:'+repr(orig[len(new):])) + else: + print('Excess chars:'+repr(new[len(orig):])) + print() + return + print('Mismatch at offset %i:' % i) + print((orig[i-100:i] + + '<- %s != %s ->' % (repr(orig[i]),repr(new[i])) + + orig[i+1:i+100])) + print() + + text = open(file).read() + import string + + t = _timer() + print('Working on a %i byte string' % len(text)) + + if 0: + print() + print('Replacing strings') + print('-'*72) + print() + for what,with_what in (('m','M'),('mx','MX'),('mxText','MXTEXT'), + ('hmm','HMM'),('hmmm','HMM'),('hmhmm','HMM')): + print('Replace "%s" with "%s"' % (what,with_what)) + t.start() + for i in range(100): + rtext = text.replace(what,with_what) + print('with string.replace:',t.stop(),'sec.') + t.start() + for i in range(100): + ttext = replace(text,what,with_what) + print('with tag.replace:',t.stop(),'sec.') + if ttext != rtext: + print('results are NOT ok !') + print('-'*72) + mismatch(rtext,ttext) + t.start() + for i in range(100): + ttext = _replace2(text,what,with_what) + print('with tag._replace2:',t.stop(),'sec.') + if ttext != rtext: + print('results are NOT ok !') + print('-'*72) + print(rtext) + t.start() + for i in range(100): + ttext = _replace3(text,what,with_what) + print('with tag._replace3:',t.stop(),'sec.') + if ttext != rtext: + print('results are NOT ok !') + print('-'*72) + print(rtext) + t.start() + for i in range(100): + ttext = _replace4(text,what,with_what) + print('with tag._replace4:',t.stop(),'sec.') + if ttext != rtext: + print('results are NOT ok !') + print('-'*72) + print(rtext) + print() + + if 0: + print() + print('String lower/upper') + print('-'*72) + print() + + op = str.lower + t.start() + for i in range(1000): + op(text) + t.stop() + print(' string.lower:',t) + + op = str.upper + t.start() + for i in range(1000): + op(text) + t.stop() + print(' string.upper:',t) + + op = upper + t.start() + for i in range(1000): + op(text) + t.stop() + print(' TextTools.upper:',t) + + op = lower + t.start() + for i in range(1000): + op(text) + t.stop() + print(' TextTools.lower:',t) + + print('Testing...', end=' ') + ltext = text.lower() + assert ltext == lower(text) + utext = text.upper() + assert utext == upper(text) + print('ok.') + + if 0: + print() + print('Joining lists') + print('-'*72) + print() + + l = setsplit(text,whitespace_set) + + op = str.join + t.start() + for i in range(1000): + op(l) + t.stop() + print(' string.join:',t) + + op = join + t.start() + for i in range(1000): + op(l) + t.stop() + print(' TextTools.join:',t) + + op = str.join + t.start() + for i in range(1000): + op(l,' ') + t.stop() + print(' string.join with separator:',t) + + op = join + t.start() + for i in range(1000): + op(l,' ') + t.stop() + print(' TextTools.join with separator:',t) + + if 0: + print() + print('Creating join lists') + print('-'*72) + print() + + repl = [] + for i in range(0,len(text),10): + repl.append((str(i),i,i+1)) + + op = joinlist + t.start() + for i in range(1000): + op(text,repl) + t.stop() + print(' TextTools.joinlist:',t) + + if 0: + print() + print('Splitting text') + print('-'*72) + print() + + op = str.split + t.start() + for i in range(100): + op(text) + t.stop() + print(' string.split whitespace:',t,'(',len(op(text)),'snippets )') + + op = setsplit + ws = whitespace_set + t.start() + for i in range(100): + op(text,ws) + t.stop() + print(' TextTools.setsplit whitespace:',t,'(',len(op(text,ws)),'snippets )') + + assert text.split() == setsplit(text,ws) + + op = str.split + sep = 'a' + t.start() + for i in range(100): + op(text,sep) + t.stop() + print(' string.split at "a":',t,'(',len(op(text,sep)),'snippets )') + + op = split + sep = 'a' + t.start() + for i in range(100): + op(text,sep) + t.stop() + print(' TextTools.split at "a":',t,'(',len(op(text,sep)),'snippets )') + + op = charsplit + sep = 'a' + t.start() + for i in range(100): + op(text,sep) + t.stop() + print(' TextTools.charsplit at "a":',t,'(',len(op(text,sep)),'snippets )') + + op = setsplit + sep = set('a') + t.start() + for i in range(100): + op(text,sep) + t.stop() + print(' TextTools.setsplit at "a":',t,'(',len(op(text,sep)),'snippets )') + + # Note: string.split and setsplit don't work identically ! + + op = str.split + sep = 'int' + t.start() + for i in range(100): + op(text,sep) + t.stop() + print(' string.split at "int":',t,'(',len(op(text,sep)),'snippets )') + + op = split + sep = 'int' + t.start() + for i in range(100): + op(text,sep) + t.stop() + print(' TextTools.split at "int":',t,'(',len(op(text,sep)),'snippets )') + + op = setsplit + sep = set('int') + t.start() + for i in range(100): + op(text,sep) + t.stop() + print(' TextTools.setsplit at "i", "n", "t":',t,'(',len(op(text,sep)),'snippets )') + + op = str.split + sep = 'register' + t.start() + for i in range(100): + op(text,sep) + t.stop() + print(' string.split at "register":',t,'(',len(op(text,sep)),'snippets )') + + op = split + sep = 'register' + t.start() + for i in range(100): + op(text,sep) + t.stop() + print(' TextTools.split at "register":',t,'(',len(op(text,sep)),'snippets )') + +if __name__=='__main__': + _bench() diff -Nru simpleparse-2.1.0a1/simpleparse/xmlparser/__init__.py simpleparse-2.2.0/simpleparse/xmlparser/__init__.py --- simpleparse-2.1.0a1/simpleparse/xmlparser/__init__.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/xmlparser/__init__.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,11 @@ +"""XML Parsing package + +At the moment it's really limited, +but it does the basics, and the rest +is mostly just a matter of fiddling +about with Unicode and CharacterType +support. There is only very minimal +support for Reference types, basically +we note that a Reference exists, but +don't do any further processing of it. +""" \ No newline at end of file diff -Nru simpleparse-2.1.0a1/simpleparse/xmlparser/xml_parser.py simpleparse-2.2.0/simpleparse/xmlparser/xml_parser.py --- simpleparse-2.1.0a1/simpleparse/xmlparser/xml_parser.py 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/simpleparse/xmlparser/xml_parser.py 2015-11-11 18:42:23.000000000 +0000 @@ -0,0 +1,200 @@ +"""XML Parser based (loosely) on the XML Spec's EBNF + +This is a hand-coded parser based on the W3C's XML specification, +there was a lot of busy-work rewriting to make the syntax agree, +but also a number of signficant structural changes required by +the limitations of the SimpleParse engine, and the completely +procedural definition of References in the XML spec (the References +don't occur in most places they can occur, and they are seen as +altering the buffer directly as soon as they are encountered, this +isn't something that fits readily into the mx.TextTools engine. + +http://www.w3.org/TR/REC-xml#sec-references + +Major Deviations from Spec: + No support for the unicode-style character classes + No support for UTF-16 (or Unicode at all, for that matter) + No support for References that alter the production + being parsed, so you can't have a Reference to an + item "and" or similar non-structure- + respecting References. References have + particular locations they can occur, and they are + just ignored elsewhere + No support for parsing the contents of References within + the primary parsing pass + No support for excluded start/end tags + Comments allowed in both tags and declarations (but not + inside content-specifiers). + Allows end tags of the form +""" + +declaration = """ + +# Simple (changable) literals +# These should be chosen based on the encoding +# of the file, which is actually embedded in the +# file :( + + := [\x20\x09\x0D\x0A]+ + := [a-zA-Z] + := letter/[_:] + := letter/digit/[-._:] + + +# don't change for XML, but would change for SGML or HTML + := '=' + := '&' + := '%' + := ';' + := ' := '?>' + := '<' + := '>' + := ' := '>' + := '/>' + + +# an XML-comment, note that this follows +# SGML semantics, so that you can embed comment_sets +# in the middle of the various declarations... +>Comment< := "" +>comment_set< := '--', xml_comment,'--' +xml_comment := -'--'* + +# whitespace in tag (including possible comment) +>TS< := (Comment/S)+ + + +# general structures +AttValue := ('"', (Reference/ -[&"] )*, '"') / ( "'", (Reference / -[&'])*, "'") + +# Names +Name := namestart, namechar* +Names := Name, (S,Name)* +Nmtoken := namechar+ +Nmtokens := Nmtoken, (S,Nmtoken)* + +# processing instructions +PI := PIO, PITarget, S?, PIContent, PIC +PIContent := -PIC* +PITarget := ?-( [Xx],[Mm],[Ll]), Name + + +## references + # character reference + CharRef := REFO,'#',('x',hex)/(int),REFC + # entity reference + EntityRef := REFO, Name, REFC + # parsed entity ref + PEReference := PREFO, Name, REFC + +Reference := EntityRef / CharRef + +Misc := Comment/S + +### PROLOG definitions... + + prolog := XMLDecl?, Misc*, (doctypedecl, Misc*)? + XMLDecl := '' + VersionInfo := TS?, 'version', TS?, Eq, TS?, (('"',VersionNum,'"')/("'",VersionNum,"'")) + VersionNum := [a-zA-Z0-9_.:-]+ + + +### Document-type declarations (DTDs) + + doctypedecl := '' + + DeclSep := PEReference / S + markupdecl := elementdecl / AttlistDecl / EntityDecl / NotationDecl / PI / Comment + + EncodingDecl := TS, 'encoding', Eq, (('"', EncName, '"') / ("'", EncName, "'") ) + EncName := [A-Za-z],[A-Za-z0-9._-]* + SDDecl := TS, 'standalone', Eq, (("'", ('yes' / 'no'), "'") / ('"', ('yes' / 'no'), '"')) + + ExternalID := ('SYSTEM', TS?, SystemLiteral) / ('PUBLIC', TS?, PubidLiteral, TS?, SystemLiteral ) / PEReference + NDataDecl := (TS, 'NDATA', TS, Name)/ (TS,PEReference,TS,(Name/ PEReference)?) + + SystemLiteral := ('"', -["]*, '"') / ("'", -[']*, "'") / PEReference + PubidLiteral := ('"', [\x20\x0D\x0Aa-zA-Z0-9'()+,./:=?;!*#@$_%-]*, '"') / ("'", [\x20\x0D\x0Aa-zA-Z0-9()+,./:=?;!*#@$_%-]*, "'") / PEReference + + PublicID := ('PUBLIC', TS, PubidLiteral) / PEReference + + +### Element-type declarations + # hack to try and get PEReference parsing for the "normal case" + # where the PEReference doesn't change the production level, which + # seems to be suggested by the spec... + + elementdecl := '' + + >elementdecl_pe< := (TS, PEReference, TS?, contentspec?) + + contentspec := 'EMPTY' / 'ANY' / Mixed / children + Mixed := ('(', S?, '#PCDATA', (S?, '|', S?, (Name/PEReference))*, S?, ')*' ) /('(', S?, '#PCDATA', S?, ')') + + repetition_specifier := ('?' / '*' / '+')? + children := (choice / seq/ PEReference), repetition_specifier + cp := (choice / seq / Name/ PEReference ), repetition_specifier + choice := '(', S?, cp, ( S?, '|', S?, cp )+, S?, ')' + seq := '(', S?, cp, ( S?, ',', S?, cp )*, S?, ')' + + +### Attribute list declarations... + AttlistDecl := '' + AttDef := TS, ((Name, TS, AttType, TS, DefaultDecl)/(PEReference, TS?, AttType?, TS?, DefaultDecl?)) + + + AttType := StringType / TokenizedType / EnumeratedType/ PEReference + StringType := 'CDATA' + TokenizedType := 'ID' / 'IDREF' / 'IDREFS' / 'ENTITY' / 'ENTITIES' / 'NMTOKEN' / 'NMTOKENS' + EnumeratedType := NotationType / Enumeration + NotationType := 'NOTATION', TS, ('(', NameOrList, ')')/PEReference + Enumeration := '(', (NmTokenOrList/PEReference), ')' + + >NameOrList< := S?, (Name/PEReference), (S?, '|', S?, (Name/PEReference))*, S? + >NmTokenOrList< := S?, (Nmtoken/PEReference), (S?, '|', S?, (Nmtoken/PEReference))*, S? + + + DefaultDecl := '#REQUIRED' / '#IMPLIED' / ((('#FIXED', TS)/PEReference)?, (AttValue/PEReference)) / PEReference + +### Entity declarations + EntityDecl := GEDecl / PEDecl + GEDecl := '' + PEDecl := '' + EntityDef := EntityValue / (ExternalID, NDataDecl?) / PEReference + PEDef := EntityValue / ExternalID / PEReference + EntityValue := ('"', (PEReference / Reference / -[%&"])*, '"') / ("'", (PEReference / Reference / -[%&'])*, "'") + +NotationDecl := '' + +### elements (nodes/tags/you-know :) ) + # limitations in the SimpleParse engine mean that this + # particular structure will be basically useless... + element := EmptyElemTag / (STag, content, ETag) + + EmptyElemTag := STagO, Name, (TS, Attribute)*, TS?, EmptyElemTagC + + STag := STagO, Name, (TS, Attribute)*, TS?, STagC + ETag := ETagO, Name?, TS?, ETagC + + content := (element / Reference / CDSect / PI / Comment / CharData)* + + Attribute := (Name, Eq, (AttValue/Reference))/(Reference,(Eq,(AttValue/Reference))?) + + # general content of an element + CharData := ( -[<&]+ / -(STag / EmptyElemTag / ETag / Reference / CDSect / PI / Comment) )+ + + # special non-parsed character data sections + CDSect := CDStart, CData, CDEnd + := ' := ']]>' + + +document := prolog, element, Misc* +""" +from simpleparse.common import numbers, strings, chartypes diff -Nru simpleparse-2.1.0a1/SimpleParse.egg-info/dependency_links.txt simpleparse-2.2.0/SimpleParse.egg-info/dependency_links.txt --- simpleparse-2.1.0a1/SimpleParse.egg-info/dependency_links.txt 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/SimpleParse.egg-info/dependency_links.txt 2015-11-11 19:05:34.000000000 +0000 @@ -0,0 +1 @@ + diff -Nru simpleparse-2.1.0a1/SimpleParse.egg-info/PKG-INFO simpleparse-2.2.0/SimpleParse.egg-info/PKG-INFO --- simpleparse-2.1.0a1/SimpleParse.egg-info/PKG-INFO 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/SimpleParse.egg-info/PKG-INFO 2015-11-11 19:05:34.000000000 +0000 @@ -0,0 +1,22 @@ +Metadata-Version: 1.1 +Name: SimpleParse +Version: 2.2.0 +Summary: A Parser Generator for Python (w/mxTextTools derivative) +Home-page: http://simpleparse.sourceforge.net/ +Author: Mike C. Fletcher +Author-email: mcfletch@users.sourceforge.net +License: UNKNOWN +Description: A Parser Generator for Python (w/mxTextTools derivative) + + Provides a moderately fast parser generator for use with Python, + includes a forked version of the mxTextTools text-processing library + modified to eliminate recursive operation and fix a number of + undesirable behaviours. + + Converts EBNF grammars directly to single-pass parsers for many + largely deterministic grammars. +Keywords: parse,parser,parsing,text,ebnf,grammar,generator +Platform: Any +Classifier: Programming Language :: Python +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Classifier: Intended Audience :: Developers diff -Nru simpleparse-2.1.0a1/SimpleParse.egg-info/SOURCES.txt simpleparse-2.2.0/SimpleParse.egg-info/SOURCES.txt --- simpleparse-2.1.0a1/SimpleParse.egg-info/SOURCES.txt 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/SimpleParse.egg-info/SOURCES.txt 2015-11-11 19:05:34.000000000 +0000 @@ -0,0 +1,95 @@ +MANIFEST.in +license.txt +setup.py +tox.ini +/home/mcfletch/OpenGL-dev/simpleparse/simpleparse/stt/TextTools/mxTextTools/mxTextTools.c +/home/mcfletch/OpenGL-dev/simpleparse/simpleparse/stt/TextTools/mxTextTools/mxbmse.c +/home/mcfletch/OpenGL-dev/simpleparse/simpleparse/stt/TextTools/mxTextTools/mxte.c +SimpleParse.egg-info/PKG-INFO +SimpleParse.egg-info/SOURCES.txt +SimpleParse.egg-info/dependency_links.txt +SimpleParse.egg-info/top_level.txt +doc/common_problems.html +doc/index.html +doc/mxLicense.html +doc/processing_result_trees.html +doc/scanning_with_simpleparse.html +doc/simpleparse_grammars.html +doc/sitestyle.css +simpleparse/__init__.py +simpleparse/baseparser.py +simpleparse/dispatchprocessor.py +simpleparse/error.py +simpleparse/generator.py +simpleparse/objectgenerator.py +simpleparse/parser.py +simpleparse/printers.py +simpleparse/processor.py +simpleparse/simpleparsegrammar.py +simpleparse/common/__init__.py +simpleparse/common/calendar_names.py +simpleparse/common/chartypes.py +simpleparse/common/comments.py +simpleparse/common/iso_date.py +simpleparse/common/iso_date_loose.py +simpleparse/common/numbers.py +simpleparse/common/phonetics.py +simpleparse/common/strings.py +simpleparse/common/timezone_names.py +simpleparse/stt/COPYRIGHT +simpleparse/stt/LICENSE +simpleparse/stt/__init__.py +simpleparse/stt/mxLicense.html +simpleparse/stt/Doc/eGenix-mx-Extensions.html +simpleparse/stt/Doc/mxLicense.html +simpleparse/stt/Doc/mxTextTools.html +simpleparse/stt/TextTools/COPYRIGHT +simpleparse/stt/TextTools/LICENSE +simpleparse/stt/TextTools/Makefile.pkg +simpleparse/stt/TextTools/README +simpleparse/stt/TextTools/TextTools.py +simpleparse/stt/TextTools/__init__.py +simpleparse/stt/TextTools/Constants/Sets.py +simpleparse/stt/TextTools/Constants/TagTables.py +simpleparse/stt/TextTools/Constants/__init__.py +simpleparse/stt/TextTools/mxTextTools/Makefile.pre.in +simpleparse/stt/TextTools/mxTextTools/__init__.py +simpleparse/stt/TextTools/mxTextTools/highcommands.h +simpleparse/stt/TextTools/mxTextTools/lowlevelcommands.h +simpleparse/stt/TextTools/mxTextTools/mx.h +simpleparse/stt/TextTools/mxTextTools/mxTextTools.c +simpleparse/stt/TextTools/mxTextTools/mxTextTools.c.~1~ +simpleparse/stt/TextTools/mxTextTools/mxTextTools.def +simpleparse/stt/TextTools/mxTextTools/mxTextTools.h +simpleparse/stt/TextTools/mxTextTools/mxbmse.c +simpleparse/stt/TextTools/mxTextTools/mxbmse.h +simpleparse/stt/TextTools/mxTextTools/mxh.h +simpleparse/stt/TextTools/mxTextTools/mxpyapi.h +simpleparse/stt/TextTools/mxTextTools/mxstdlib.h +simpleparse/stt/TextTools/mxTextTools/mxte.c +simpleparse/stt/TextTools/mxTextTools/mxte_impl.h +simpleparse/stt/TextTools/mxTextTools/recursecommands.h +simpleparse/stt/TextTools/mxTextTools/speccommands.h +simpleparse/xmlparser/__init__.py +simpleparse/xmlparser/xml_parser.py +tests/__init__.py +tests/genericvalues.py +tests/mx_flag.py +tests/mx_high.py +tests/mx_low.py +tests/mx_recursive.py +tests/mx_special.py +tests/test_backup_on_subtable_failure.py +tests/test_common_chartypes.py +tests/test_common_comments.py +tests/test_common_iso_date.py +tests/test_common_numbers.py +tests/test_common_strings.py +tests/test_deep_nesting.py +tests/test_erroronfail.py +tests/test_grammarparser.py +tests/test_objectgenerator.py +tests/test_optimisation.py +tests/test_printers.py +tests/test_simpleparsegrammar.py +tests/test_xml.py \ No newline at end of file diff -Nru simpleparse-2.1.0a1/SimpleParse.egg-info/top_level.txt simpleparse-2.2.0/SimpleParse.egg-info/top_level.txt --- simpleparse-2.1.0a1/SimpleParse.egg-info/top_level.txt 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/SimpleParse.egg-info/top_level.txt 2015-11-11 19:05:34.000000000 +0000 @@ -0,0 +1 @@ +simpleparse diff -Nru simpleparse-2.1.0a1/simpleparsegrammar.py simpleparse-2.2.0/simpleparsegrammar.py --- simpleparse-2.1.0a1/simpleparsegrammar.py 2006-02-19 00:46:13.000000000 +0000 +++ simpleparse-2.2.0/simpleparsegrammar.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,701 +0,0 @@ -'''Default SimpleParse EBNF grammar as a generator with productions - -This module defines the original SimpleParse -grammar. It uses the generator objects directly -as this is the first grammar being written. -''' -from simpleparse.objectgenerator import * -from simpleparse import generator, baseparser -import string -from simpleparse.dispatchprocessor import * -try: - unicode - HAVE_UNICODE = 1 -except NameError: - HAVE_UNICODE = 0 - -# note that whitespace is slightly different -# due to a bug with NULL-matching repeating groups -# we make all the ts references ts? -whitespace = Name (value = "ts", report = 0) -element_token = Name( value = "element_token" ) -literal = Name ( value = "literal") -group = Name ( value = "group") -characterrange = Name ( value = "range") -name = Name ( value = "name") - - -SPGenerator = generator.Generator () - -SPGenerator.addDefinition( - "declarationset", - Name (value = "declaration", repeating = 1), -) - - - -SPGenerator.addDefinition ( - "declaration", - SequentialGroup ( - children = [ - whitespace, - FirstOfGroup ( - children = [ - Name (value = "unreportedname", ), - Name (value = "expandedname", ), - Name (value = "name", ), - ], - ), - whitespace, - Literal (value = ":"), - Literal (value = ":", optional=1), - Literal (value = "=",), - Name( value = "seq_group"), - ], - ) -) - -SPGenerator.addDefinition ( - "group", - SequentialGroup ( - children = [ - Literal (value ="("), - Name( value= "seq_group"), - Literal (value =")"), - ], - expanded = 1, - ) -) - -_seq_children = FirstOfGroup( - children = [ - Name(value="error_on_fail"), - Name(value="fo_group"), - Name(value="element_token"), - ], -) - -SPGenerator.addDefinition ( - "seq_group", - SequentialGroup ( - children = [ - whitespace, - _seq_children, - SequentialGroup( - children = [ - whitespace, - Name( value="seq_indicator"), - whitespace, - _seq_children, - ], - repeating = 1, optional = 1, - ), - whitespace, - ], - ), -) - -SPGenerator.addDefinition ( - "fo_group", - SequentialGroup ( - children = [ - element_token, - SequentialGroup( - children = [ - whitespace, - Name( value="fo_indicator"), - whitespace, - element_token, - ], - repeating = 1, - ), - ], - ) -) -SPGenerator.addDefinition ( - "seq_indicator", - Literal(value = ",", report=0 ), -) -SPGenerator.addDefinition ( - "fo_indicator", - Literal(value = "/", report=0 ), -) - -SPGenerator.addDefinition ( - "element_token", - SequentialGroup ( - children = [ - Name (value = "lookahead_indicator", optional = 1), - whitespace, - Name (value = "negpos_indicator", optional = 1), - whitespace, - FirstOfGroup ( - children = [ - literal, - characterrange, - group, - name, - ] - ), - whitespace, - Name (value = "occurence_indicator", optional = 1), - whitespace, - Name (value = "error_on_fail", optional = 1), - ] - ) -) - -SPGenerator.addDefinition ( - "negpos_indicator", - Range (value = "+-" ) -) -SPGenerator.addDefinition ( - "lookahead_indicator", - Literal(value = "?" ), -) - -SPGenerator.addDefinition ( - "occurence_indicator", - Range (value = "+*?" ), -) -SPGenerator.addDefinition ( - "error_on_fail", - SequentialGroup ( - children = [ - Literal (value ="!"), - SequentialGroup ( - children = [ - whitespace, - Name( value="literal"), - ], - optional = 1, - ), - ], - ), -) - -SPGenerator.addDefinition ( - "unreportedname", - SequentialGroup ( - children = [ - Literal (value ="<"), - whitespace, - name, - whitespace, - Literal (value =">"), - ] - ) -) -SPGenerator.addDefinition ( - "expandedname", - SequentialGroup ( - children = [ - Literal (value =">"), - whitespace, - name, - whitespace, - Literal (value ="<"), - ] - ) -) - -SPGenerator.addDefinition ( - "name", - SequentialGroup ( - children = [ - Range(value ='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'), - Range(value ='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789', optional= 1, repeating= 1), - ] - ) -) - -SPGenerator.addDefinition ( - "ts", # ( [ \011-\015]+ / ('#',-'\n'+,'\n')+ )* - FirstOfGroup ( - children = [ - Range(value =' \011\012\013\014\015', repeating=1), - Name( value = "comment" ), - ], - repeating = 1, optional=1, - ) -) -SPGenerator.addDefinition ( - "comment", # ( [ \011-\015]+ / ('#',-'\n'+,'\n')+ )* - SequentialGroup ( - children = [ - Literal ( value ="#"), - Literal (value ="\n", negative = 1, repeating = 1, optional=1), - Literal (value = "\n",), - ], - ), -) - -SPGenerator.addDefinition ( - "literalDecorator", # literalDecorator := [c] - Range( value = 'c' ) -) - -SPGenerator.addDefinition ( - "literal", # ("'",(CHARNOSNGLQUOTE/ESCAPEDCHAR)*,"'") / ('"',(CHARNODBLQUOTE/ESCAPEDCHAR)*,'"') - SequentialGroup( - children = [ - Name( value = 'literalDecorator', optional=1 ), - FirstOfGroup ( - children = [ - SequentialGroup ( - children = [ - Literal (value ="'"), - FirstOfGroup ( - children = [ - Name (value = "CHARNOSNGLQUOTE"), - Name (value = "ESCAPEDCHAR"), - ], - optional = 1, repeating = 1, - ), - Literal (value ="'"), - ], - ), - SequentialGroup ( - children = [ - Literal (value ='"'), - FirstOfGroup ( - children = [ - Name (value = "CHARNODBLQUOTE"), - Name (value = "ESCAPEDCHAR"), - ], - optional = 1, repeating = 1, - ), - Literal (value ='"'), - ], - ) - ], - ), - ], - ) -) - -SPGenerator.addDefinition ( - "range", # '[',CHARBRACE?,CHARDASH?, (CHARRANGE/CHARNOBRACE)*, CHARDASH?,']' - SequentialGroup ( - children =[ - Literal (value ="["), - Name (value ="CHARBRACE",optional = 1), - Name (value ="CHARDASH",optional = 1), - FirstOfGroup( - children = [ - Name (value ="CHARRANGE"), - Name (value ="CHARNOBRACE"), - ], - optional = 1, repeating = 1, - ), - Name (value ="CHARDASH",optional = 1), - Literal (value ="]"), - ], - ) -) -SPGenerator.addDefinition ( - "CHARBRACE", - Literal (value = "]"), -) -SPGenerator.addDefinition ( - "CHARDASH", - Literal (value = "-"), -) -SPGenerator.addDefinition ( - "CHARRANGE", # CHARRANGE := CHARNOBRACE, '-', CHARNOBRACE - SequentialGroup ( - children =[ - Name (value ="CHARNOBRACE"), - Literal (value ="-"), - Name (value ="CHARNOBRACE"), - ], - ), -) -SPGenerator.addDefinition ( - "CHARNOBRACE", # CHARRANGE := CHARNOBRACE, '-', CHARNOBRACE - FirstOfGroup( - children =[ - Name (value ="ESCAPEDCHAR"), - Name (value ="CHAR"), - ], - ), -) -SPGenerator.addDefinition ( - "CHAR", - Literal ( - value ="]", - negative = 1, - ), -) - -SPGenerator.addDefinition ( - "ESCAPEDCHAR", # '\\',( SPECIALESCAPEDCHAR / ('x',HEXESCAPEDCHAR) / OCTALESCAPEDCHAR ) - SequentialGroup ( - children =[ - Literal (value ="\\"), - FirstOfGroup( - children = [ - Name (value ="SPECIALESCAPEDCHAR"), - SequentialGroup( - children = [ - Range( value = 'xX' ), - Name( value="HEXESCAPEDCHAR"), - ] - ), - Name (value ="OCTALESCAPEDCHAR"), - ], - ), - ], - ) -) - -SPGenerator.addDefinition ( - "SPECIALESCAPEDCHAR", - Range(value ='\\abfnrtv"\''), -) - -SPGenerator.addDefinition ( - "OCTALESCAPEDCHAR", # [0-7],[0-7]?,[0-7]? - SequentialGroup ( - children =[ - Range (value ="01234567"), - Range (value ="01234567", optional = 1), - Range (value ="01234567", optional = 1), - ], - ) -) -SPGenerator.addDefinition ( - "HEXESCAPEDCHAR", # [0-9a-fA-F],[0-9a-fA-F] - SequentialGroup ( - children =[ - Range (value ="0123456789abcdefABCDEF"), - Range (value ="0123456789abcdefABCDEF"), - ], - ) -) - - -SPGenerator.addDefinition ( - "CHARNODBLQUOTE", - Range(value ='\\"', negative = 1, repeating = 1), -) -SPGenerator.addDefinition ( - "CHARNOSNGLQUOTE", - Range(value ="\\'", negative = 1, repeating = 1), -) - -declaration = r"""declarationset := declaration+ -declaration := ts, (unreportedname/expandedname/name) ,ts,':',':'?,'=',seq_group - -element_token := lookahead_indicator?, ts, negpos_indicator?,ts, (literal/range/group/name),ts, occurence_indicator?, ts, error_on_fail? - -negpos_indicator := [-+] -lookahead_indicator := "?" -occurence_indicator := [+*?] -error_on_fail := "!", (ts,literal)? - ->group< := '(',seq_group, ')' -seq_group := ts,(error_on_fail/fo_group/element_token), - (ts, seq_indicator, ts, - (error_on_fail/fo_group/element_token) - )*, ts - -fo_group := element_token, (ts, fo_indicator, ts, element_token)+ - - -# following two are likely something peoples might want to -# replace in many instances... - := "/" - := ',' - -unreportedname := '<', name, '>' -expandedname := '>', name, '<' -name := [a-zA-Z_],[a-zA-Z0-9_]* - := ( [ \011-\015]+ / comment )* -comment := '#',-'\n'*,'\n' -literal := literalDecorator?,("'",(CHARNOSNGLQUOTE/ESCAPEDCHAR)*,"'") / ('"',(CHARNODBLQUOTE/ESCAPEDCHAR)*,'"') -literalDecorator := [c] - - - -range := '[',CHARBRACE?,CHARDASH?, (CHARRANGE/CHARNOBRACE)*, CHARDASH?,']' -CHARBRACE := ']' -CHARDASH := '-' -CHARRANGE := CHARNOBRACE, '-', CHARNOBRACE -CHARNOBRACE := ESCAPEDCHAR/CHAR -CHAR := -[]] -ESCAPEDCHAR := '\\',( SPECIALESCAPEDCHAR / ('x',HEXESCAPEDCHAR) / ("u",UNICODEESCAPEDCHAR_16) /("U",UNICODEESCAPEDCHAR_32)/OCTALESCAPEDCHAR ) -SPECIALESCAPEDCHAR := [\\abfnrtv"'] -OCTALESCAPEDCHAR := [0-7],[0-7]?,[0-7]? -HEXESCAPEDCHAR := [0-9a-fA-F],[0-9a-fA-F] -CHARNODBLQUOTE := -[\\"]+ -CHARNOSNGLQUOTE := -[\\']+ -UNICODEESCAPEDCHAR_16 := [0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F] -UNICODEESCAPEDCHAR_32 := [0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F] -""" - -### Now the interpreter objects... -class Parser(baseparser.BaseParser): - """Parser which generates new parsers from EBNF grammars - - This parser class allows you to pass in an EBNF grammar as - the initialisation parameter. The EBNF is processed, and a - SimpleParse generator object is created as self.generator. - - Unlike most Parsers, this object is intended to be re-created - for each bit of data it parses (i.e. each EBNF), so it warps - the standard API a lot. - """ - _rootProduction = 'declarationset' - def __init__( self, ebnf, prebuilts=(), methodSource=None, definitionSources=() ): - """Create a new generator based on the EBNF in simpleparse format""" - processor = SPGrammarProcessor( prebuilts, definitionSources ) - success, tags, next = self.parse( ebnf, self._rootProduction, processor=processor ) - if next != len(ebnf): - lineNumber = lines(0, next, ebnf) - raise ValueError( - """Unable to complete parsing of the EBNF, stopped at line %s (%s chars of %s) -Unparsed:\n%s..."""%(lineNumber, next, len(ebnf), ebnf[next:next+100]) - ) - self.generator = processor.generator - def buildTagger( self, name=None, processor = None ): - """Build the tag-table for parsing the EBNF for this parser""" - return SPGenerator.buildParser( name, processor ) - -class SPGrammarProcessor( DispatchProcessor ): - """Processing object for post-processing an EBNF into a new generator""" - ### top level - def __init__( self, prebuilts=(), definitionSources=() ): - """Create a new generator based on the EBNF in simpleparse format""" - self.generator = generator.Generator() - for (name, table) in prebuilts: - if isinstance( table, ElementToken): - self.generator.addDefinition( name, table) - else: - self.generator.addDefinition( name, Prebuilt(value=table)) - for source in definitionSources: - self.generator.addDefinitionSource( source ) - - def declaration( self, (tag, left, right, sublist), buffer): - '''Base declaration from the grammar, a "production" or "rule"''' - name = sublist[0] - expanded = 0 - if name[0] == "unreportedname": - name = name[3][0] - # note that the info is stored in the wrong place :( - report = 0 - elif name[0] == 'expandedname': - report = 1 - expanded = 1 - name = name[3][0] - else: - report = 1 - name = getString( name, buffer ) - self.currentProduction = name - content = dispatch( self, sublist[1], buffer ) - content.report = report - content.expanded = expanded - self.generator.addDefinition( - name, - content, - ) - del self.currentProduction - - ### element configuration - def element_token( self, (tag, left, right, sublist), buffer): - '''get the children, then configure''' - base = None - negative = 0 - optional = 0 - repeating = 0 - lookahead = 0 - errorOnFail = None - for tup in sublist: - result = dispatch( self, tup, buffer ) - if tup[0] == 'negpos_indicator': - negative = result - elif tup[0] == 'occurence_indicator': - optional, repeating = result - elif tup[0] == 'lookahead_indicator': - lookahead = result - elif tup[0] == 'error_on_fail': - # we do some extra work here - errorOnFail = result - self._config_error_on_fail( errorOnFail, (tag,left,tup[1],[]), buffer ) - else: - base = result - base.optional = optional - base.negative = negative - base.repeating = repeating - base.lookahead = lookahead - if errorOnFail: - base.errorOnFail = errorOnFail - return base - - ### generator-node-builders - def seq_group( self, (tag, left, right, sublist), buffer): - """Process a sequential-group into a SequentialGroup element token""" - children = dispatchList( self, sublist, buffer ) - errorOnFail = None - result = [] - for (item,tup) in map(None,children,sublist): - if isinstance( item, ErrorOnFail ): - errorOnFail = item - else: - if errorOnFail: - item.errorOnFail = errorOnFail.copy() - self._config_error_on_fail( - item.errorOnFail, - tup, - buffer - ) - result.append( item ) - if len(result) == 1: - # single-item sequential group (very common) - return result[0] - elif not result: - raise ValueError( """SequentialGroup on line %s doesn't have an element-token child! grammar was %s"""%( lines(0,left, buffer), buffer[left:left+25])) - base = SequentialGroup( - children = result, - ) - return base - def fo_group( self, (tag, left, right, sublist), buffer): - """Process a first-of-group into a FirstOf element token""" - children = dispatchList( self, sublist, buffer ) - if len(children) == 1: - # this should never happen, but if it does, we can deal with it I suppose... - return children[0] - base = FirstOfGroup( - children = children - ) - return base - - def literal( self, (tag, left, right, sublist), buffer): - '''Turn a literal result into a literal generator''' - if sublist and sublist[0][0] == 'literalDecorator': - # right now only have the one decorator... - sublist = sublist[1:] - classObject = CILiteral - else: - classObject = Literal - elements = dispatchList( self, sublist, buffer) - ### Should check for CILiteral with non-CI string or single-character value! - return classObject( value = string.join(elements, "" ) ) - - def range( self, (tag, left, right, sublist), buffer): -## if hasattr( Range, 'requiresExpandedSet') and Range.requiresExpandedSet: - return Range( - value = string.join(dispatchList( self, sublist, buffer),''), - ) -## else: -## # need to build up a new-syntax version of the range... -## # escape ^ to \^ -## # escape \ to \\ -## # escape - to \- -## # make sure range-sets are in proper order... -## raise NotImplementedError( """Haven't got the new CharSet version implemented yet""") - def name( self, tup, buffer): - return Name( - value = getString(tup, buffer), - ) - ### simple translators - occurenceIndicatorMap = { - '*': (1,1), - '+': (0,1), - '?': (1,0), - } - def occurence_indicator( self, tup, buffer): - '''Return optional, repeating as a tuple of true/false values''' - value = getString(tup, buffer) - return self.occurenceIndicatorMap[value] - def lookahead_indicator( self, tup, buffer ): - """If present, the lookahead indictor just says "yes", so just return 1""" - return 1 - def error_on_fail( self, (tag,left,right,children), buffer ): - """If present, we are going to make the current object an errorOnFail type, - - If there's a string literal child, then we use it to create the - "message" attribute of the errorOnFail object. - """ - err = ErrorOnFail() - if children: - (tag,left,right,children) = children[0] - message = string.join( dispatchList( self, children, buffer), "") - err.message = message - return err - def _config_error_on_fail( self, errorOnFail, tup, buffer ): - """Configure an error-on-fail instance for a given child tuple""" - # what we expected to find... - errorOnFail.expected = buffer[tup[1]:tup[2]] - if hasattr( self, "currentProduction"): - errorOnFail.production = self.currentProduction - - - negposIndicatorMap = { - '+': 0, - '-': 1, - } - def negpos_indicator( self, tup, buffer ): - '''return whether indicates negative''' - value = getString(tup, buffer) - return self.negposIndicatorMap[value] - - def CHARNODBLQUOTE( self, tup, buffer): - return getString(tup, buffer) - CHAR = CHARNOSNGLQUOTE = CHARNODBLQUOTE - def ESCAPEDCHAR( self, (tag, left, right, sublist), buffer): - return string.join(dispatchList( self, sublist, buffer), "") - specialescapedmap = { - 'a':'\a', - 'b':'\b', - 'f':'\f', - 'n':'\n', - 'r':'\r', - 't':'\t', - 'v':'\v', - '\\':'\\', - '"':'"', - "'":"'", - } - def SPECIALESCAPEDCHAR( self, tup, buffer): - return self.specialescapedmap[ getString(tup, buffer)] - def OCTALESCAPEDCHAR(self, tup, buffer): - return chr(string.atoi( getString(tup, buffer), 8 )) - def HEXESCAPEDCHAR( self, tup , buffer): - return chr(string.atoi( getString(tup, buffer), 16 )) - def CHARNOBRACE( self, (tag, left, right, sublist), buffer): - return string.join(dispatchList( self, sublist, buffer), "") - def CHARRANGE( self, (tag, left, right, sublist), buffer): - '''Create a string from first to second item''' - # following should never raise an error, as there's only one possible format... - try: - first, second = map( ord, dispatchList( self, sublist, buffer)) - except TypeError: - import pdb - pdb.set_trace () - if second < first: - second, first = first, second - return string.join(map( chr, range(first, second+1),), '') - def CHARDASH( self, tup , buffer): - return '-' - def CHARBRACE( self, tup , buffer): - return ']' - - if HAVE_UNICODE: - def UNICODEESCAPEDCHAR_16( self, (tag, left, right, sublist), buffer): - """Only available in unicode-aware Python versions""" - char = unichr(int( buffer[left:right], 16 )) - return char - ### Only available in wide-unicode Python versions (rare) - UNICODEESCAPEDCHAR_32 = UNICODEESCAPEDCHAR_16 - else: - # ignore unicode-specific characters, though this isn't a particularly - # useful approach, I don't see a better option at the moment... - def UNICODEESCAPEDCHAR_16( self, (tag, left, right, sublist), buffer): - """Only available in unicode-aware Python versions""" - return "" - - def UNICODEESCAPEDCHAR_32( self, (tag, left, right, sublist), buffer): - """Only available in wide-unicode Python versions (rare)""" - return "" - diff -Nru simpleparse-2.1.0a1/stt/COPYRIGHT simpleparse-2.2.0/stt/COPYRIGHT --- simpleparse-2.1.0a1/stt/COPYRIGHT 2006-02-18 23:32:36.000000000 +0000 +++ simpleparse-2.2.0/stt/COPYRIGHT 1970-01-01 00:00:00.000000000 +0000 @@ -1,11 +0,0 @@ - --------------------------------------------------------------------- - COPYRIGHT NOTICE - --------------------------------------------------------------------- - Copyright (c) 1997-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com - Copyright (c) 2000-2001, eGenix.com Software GmbH; mailto:info@egenix.com - Copyright (c) 2003-2006, Mike Fletcher; mailto:mcfletch@vrplumber.com - - All Rights Reserved. - - This software is protected by copyright and other intellectual - property laws and by international treaties. diff -Nru simpleparse-2.1.0a1/stt/Doc/eGenix-mx-Extensions.html simpleparse-2.2.0/stt/Doc/eGenix-mx-Extensions.html --- simpleparse-2.1.0a1/stt/Doc/eGenix-mx-Extensions.html 2006-02-18 23:33:31.000000000 +0000 +++ simpleparse-2.2.0/stt/Doc/eGenix-mx-Extensions.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,1708 +0,0 @@ - - - - eGenix.com mx Extensions for Python - - - - - - -
    -

    eGenix.com mx Extensions for Python

    - -
    -
    - - - -
    - - BASE package
        ( - mxDateTime : - mxTextTools : - mxStack : - mxTools : - mxProxy : - mxURL : - mxUID : - History : - Download - )
    - COMMERCIAL package
        ( - mxODBC : - History : - Buy Licenses : - Special Offer : - Download - )
    - EXPERIMENTAL package
        ( - mxNumber : - mxTidy : - History : - Download - )
    - Commercial Support : - Home -
    -
    - -     - -
    -
    - - - -

    Introduction

    - -
      - -

      - The eGenix.com mx - Extensions for Python are a collection of professional - quality Python software tools which enhance Python's usability in many - important areas such as ODBC database connectivity, fast - text processing, date/time processing and web site - programming. - -

      - The tools have a proven record of being portable across many - Unix and Windows platforms, e.g. you can write applications - which use an ODBC database on Windows which then run on Unix - platforms without change due to the consistent platforms - independent interfaces. - -

      - All of the available packages have shown their stability - and usefulness in many mission critical applications and - various commercial settings all around the world. - -

      - The two most well-known packages from the mx Extension - Series are mxDateTime and mxODBC providing date/time services and - professional ODBC database connectivity on practically all - supported Python platforms. These two packages enable - database software which is portable not only across - platforms, but also across database backends. - -

    - - - -

    Overview

    - -
    - - - -
    - -

    Packages

    - -
      - -

      - The following subpackages are included in the eGenix.com mx - Extension series, each providing fast and efficient - implementations for various application domains. All - subpackages live in the mx top-level Python package - to avoid naming collisions with other Python software. - -

      - - - - - - - - - - - - - - - - - - - - - - -
      - -

      - eGenix.com mx BASE Package: - -

      - -
        - - mxDateTime - Generic Date/Time Datatypes
        - mxTextTools - Fast Text Processing Tools
        - mxStack - Fast and Memory-Efficient Stack Datatype
        - mxTools - Collection of Additional Builtins
        - mxProxy - Generic Object Proxy & Weak Reference Datatype
        - mxBeeBase - On-disk B+Tree Database Construction Kit
        - mxURL - Efficient Storage and Management of URL/URI Information
        - mxUID - Create and Manage Unique IDs
        -
        - >>> Download
        -
        -
      - -
      - -

      - eGenix.com mx COMMERCIAL Package: - -

      - -
        - - mxODBC - Python DB-API compatible ODBC - 2.0 - 3.5 database interface;
        - supports Python 1.5.2 and Unicode for Python 2.0 and later
        -
        - >>> Download and - Buy Licenses
        -
        -
      - -
      - -

      - eGenix.com mx EXPERIMENTAL Package: - -

      - -
        - - mxNumber - Interface to GNU MP's High Precision Numerics
        - mxTidy - Interface to a library version of HTML Tidy
        -
        - >>> Download
        -
        -
      - -
      - -

      - -

    - -
    - -
    - -

    eGenix.com mx Extensions - BASE Package

    - -
    - -   Version 2.0.3   - -
    - -
    - -

    - - - -


    - -

    eGenix.com mx Extensions - COMMERCIAL Package

    - -
    - -   Version 2.0.4   - -
    - -
    - -

    - - - -


    - -

    eGenix.com mx Extensions - EXPERIMENTAL Package

    - -
    - -   Version 0.7.0   - -
    - -
    - -

    - - - -


    - -

    Commercial Support

    - -
    - - - -
    -
    - - © 1997-2000, Copyright by Marc-André Lemburg; All - Rights Reserved. mailto: mal@lemburg.com -
    - © 2000-2001, Copyright by eGenix.com Software GmbH, - Langenfeld; All Rights Reserved. mailto: info@egenix.com -
    - Trademarks: "mx Extensions" is a trademark of Marc-Andre - Lemburg and eGenix.com GmbH. - -
    - - - diff -Nru simpleparse-2.1.0a1/stt/Doc/mxLicense.html simpleparse-2.2.0/stt/Doc/mxLicense.html --- simpleparse-2.1.0a1/stt/Doc/mxLicense.html 2006-02-18 23:33:31.000000000 +0000 +++ simpleparse-2.2.0/stt/Doc/mxLicense.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,868 +0,0 @@ - - - - mx Extension Series - License Information - - - - - -
    -

    mx Extension Series - License Information

    - -
    - - - - -
    - - Public License : - Commercial License : - Home - - - - Version 1.1.0 - -
    -
    - -

    Introduction

    - -
      - -

      - The mx Extensions Series packages are brought to you by the - eGenix.com Software, Skills and Services GmbH, Langenfeld, - Germany. We are licensing our products under the following - two different licenses: -

      - -

      - The Public License is very similar to the Python 2.0 - license and covers the open source software made available - by eGenix.com which is free of charge even for commercial - use. - -

      - The Commercial License is intended for covering - commercial eGenix.com software, notably the mxODBC - package. Only private and non-commercial use is free of - charge. - -

      - If you have questions regarding these licenses, please - contact Licenses@eGenix.com. - If you would like to bundle the software with your - commercial product, please write to Sales@eGenix.com - for more information about the redistribution conditions and - terms. - -

    - - - -

    eGenix.com Public License

    - -
      - -

      - The eGenix.com Public License is similar to the Python 2.0 - and considered an Open Source license (in the sense defined - by the Open Source - Intiative (OSI)) by eGenix.com. - -

      - The license should also be compatible to the GNU Public License in case - that matters. The only part which is known to have caused - some problems with Richard Stallmann in the past is the - choice of law clause. - -

      -

      - - - - -
      -
      -________________________________________________________________________
      -
      -EGENIX.COM PUBLIC LICENSE AGREEMENT                        VERSION 1.1.0
      -________________________________________________________________________
      -
      -1.  Introduction
      -
      -    This "License Agreement" is between eGenix.com Software, Skills
      -    and Services GmbH ("eGenix.com"), having an office at
      -    Pastor-Loeh-Str. 48, D-40764 Langenfeld, Germany, and the
      -    Individual or Organization ("Licensee") accessing and otherwise
      -    using this software in source or binary form and its associated
      -    documentation ("the Software").
      -
      -2.  License 
      -
      -    Subject to the terms and conditions of this eGenix.com Public
      -    License Agreement, eGenix.com hereby grants Licensee a
      -    non-exclusive, royalty-free, world-wide license to reproduce,
      -    analyze, test, perform and/or display publicly, prepare derivative
      -    works, distribute, and otherwise use the Software alone or in any
      -    derivative version, provided, however, that the eGenix.com Public
      -    License Agreement is retained in the Software, or in any
      -    derivative version of the Software prepared by Licensee.
      -
      -3.  NO WARRANTY
      -
      -    eGenix.com is making the Software available to Licensee on an "AS
      -    IS" basis.  SUBJECT TO ANY STATUTORY WARRANTIES WHICH CAN NOT BE
      -    EXCLUDED, EGENIX.COM MAKES NO REPRESENTATIONS OR WARRANTIES,
      -    EXPRESS OR IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION,
      -    EGENIX.COM MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY
      -    OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT
      -    THE USE OF THE SOFTWARE WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
      -
      -4.  LIMITATION OF LIABILITY
      -
      -    EGENIX.COM SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF
      -    THE SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES
      -    OR LOSS (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF
      -    BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS
      -    INFORMATION, OR OTHER PECUNIARY LOSS) AS A RESULT OF USING,
      -    MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY DERIVATIVE THEREOF,
      -    EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
      -
      -    SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF
      -    INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THE ABOVE EXCLUSION OR
      -    LIMITATION MAY NOT APPLY TO LICENSEE.
      -
      -5.  Termination
      -
      -    This License Agreement will automatically terminate upon a
      -    material breach of its terms and conditions.
      -
      -6.  Third Party Rights 
      -
      -    Any software or documentation in source or binary form provided
      -    along with the Software that is associated with a separate license
      -    agreement is licensed to Licensee under the terms of that license
      -    agreement. This License Agreement does not apply to those portions
      -    of the Software. Copies of the third party licenses are included
      -    in the Software Distribution.
      -
      -7.  General
      -
      -    Nothing in this License Agreement affects any statutory rights of
      -    consumers that cannot be waived or limited by contract.
      -
      -    Nothing in this License Agreement shall be deemed to create any
      -    relationship of agency, partnership, or joint venture between
      -    eGenix.com and Licensee.
      -
      -    If any provision of this License Agreement shall be unlawful,
      -    void, or for any reason unenforceable, such provision shall be
      -    modified to the extent necessary to render it enforceable without
      -    losing its intent, or, if no such modification is possible, be
      -    severed from this License Agreement and shall not affect the
      -    validity and enforceability of the remaining provisions of this
      -    License Agreement.
      -
      -    This License Agreement shall be governed by and interpreted in all
      -    respects by the law of Germany, excluding conflict of law
      -    provisions. It shall not be governed by the United Nations
      -    Convention on Contracts for International Sale of Goods.
      -
      -    This License Agreement does not grant permission to use eGenix.com
      -    trademarks or trade names in a trademark sense to endorse or
      -    promote products or services of Licensee, or any third party.
      -
      -    The controlling language of this License Agreement is English. If
      -    Licensee has received a translation into another language, it has
      -    been provided for Licensee's convenience only.
      -
      -8.  Agreement
      -
      -    By downloading, copying, installing or otherwise using the
      -    Software, Licensee agrees to be bound by the terms and conditions
      -    of this License Agreement.
      -
      -
      -    For question regarding this License Agreement, please write to:
      -
      -	      eGenix.com Software, Skills and Services GmbH
      -	      Pastor-Loeh-Str. 48
      -	      D-40764 Langenfeld
      -	      Germany
      -		    
      -
      -
      - -

      - -
    - - - -

    eGenix.com Commercial License

    - -
      - -

      - The eGenix.com Commercial License is covers commercial - eGenix.com software, notably the mxODBC package. Only - private and non-commercial use is free of charge. - -

      - Usage of the software in commercial settings such as for - implementing in-house applications in or for companies, - governments, for-profit organizations, etc. requires a - signed "Proof of Authorization" which can be bought from - eGenix.com in order to authorize this use. - -

      eGenix.com Commercial Licensing Models

      - -

      - We currently offer four models to choose from: -

        - -
      1. CPU License: per-installation licenses (both for - commercial and non-commercial use)

      2. - -
      3. Developer CPU License: per-developer-seat - licenses which allow redistribution

      4. - -
      5. Reseller Agreement: agreement which allows - reselling the software to third parties for standalone use -

      6. - -
      7. Product Reseller Agreement: agreement which - allows reselling the software to third parties for use in a - specific product only

      8. - -
      - -

      - The first two options are covered by the eGenix.com - Commercial License through the "Proof of Authorization" - forms we provide below. The two reseller options have to be - negotiated between the reseller and eGenix.com. Please - contact sales@eGenix.com if you - are interested in becoming an eGenix.com software reseller. - -

      - -

      -

      - - - - -
      -
      -________________________________________________________________________
      -
      -EGENIX.COM COMMERCIAL LICENSE AGREEMENT                    VERSION 1.1.0
      -________________________________________________________________________
      -
      -1.  Introduction
      -
      -    This "License Agreement" is between eGenix.com Software, Skills
      -    and Services GmbH ("eGenix.com"), having an office at
      -    Pastor-Loeh-Str. 48, D-40764 Langenfeld, Germany, and the
      -    Individual or Organization ("Licensee") accessing and otherwise
      -    using this software in source or binary form and its associated
      -    documentation ("the Software").
      -
      -2.  Terms and Definitions
      -
      -    The "Software" covered under this License Agreement includes
      -    without limitation, all object code, source code, help files,
      -    publications, documentation and other programs, products or tools
      -    that are included in the official "Software Distribution"
      -    available from eGenix.com.
      -
      -    The "Proof of Authorization" for the Software is a written and
      -    signed notice from eGenix.com providing evidence of the extent of
      -    authorizations the Licensee has acquired to use the Software and
      -    of Licensee's eligibility for future upgrade program prices (if
      -    announced) and potential special or promotional opportunities. As
      -    such, the Proof of Authorization becomes part of this License
      -    Agreement.
      -
      -    Installation of the Software ("Installation") refers to the
      -    process of unpacking or copying the files included in the Software
      -    Distribution to an Installation Target.
      -
      -    "Installation Target" refers to the target of an installation
      -    operation.  Targets are defined as follows:
      -
      -	1) "CPU" refers to a central processing unit which is able to
      -	store and/or execute the Software (a server, personal
      -    	computer, or other computer-like device) using at most two (2)
      -    	processors,
      -
      -	2) "Site" refers to at most one hundred fifty (150) CPUs
      -    	installed at a single site of a company,
      -
      -	3) "Corporate" refers to at most one thousand (1000) CPUs
      -	installed at an unlimited number of sites of the company,
      -
      -	4) "Developer CPU" refers to a single CPU used by at most one (1)
      -    	developer.
      -
      -    When installing the Software on a server CPU for use by other CPUs
      -    in a network, Licensee must obtain a License for the server CPU
      -    and for all client CPUs attached to the network which will make
      -    use of the Software by copying the Software in binary or source
      -    form from the server into their CPU memory. If a CPU makes use of
      -    more than two (2) processors, Licensee must obtain additional CPU
      -    licenses to cover the total number of installed
      -    processors. Likewise, if a Developer CPU is used by more than one
      -    developer, Licensee must obtain additional Developer CPU licenses
      -    to cover the total number of developers using the CPU.
      -
      -    "Commercial Environment" refers to any application environment
      -    which is aimed at directly or indirectly generating profit. This
      -    includes, without limitation, for-profit organizations,
      -    governments, private educational institutions, work as independent
      -    contractor, consultant and other profit generating relationships
      -    with organizations or individuals.
      -
      -    "Non-Commercial Environments" are all those application
      -    environments which do not directly or indirectly generate profit.
      -    Public educational institutions and officially acknowledged
      -    non-profit organizations are regarded as being a Non-Commercial
      -    Environments in the aforementioned sense.
      -
      -3.  License Grant
      -
      -    Subject to the terms and conditions of this License Agreement,
      -    eGenix.com hereby grants Licensee a non-exclusive, world-wide
      -    license to
      -
      -	1) use the Software to the extent of authorizations Licensee has
      -	acquired and
      -
      -	2) distribute, make and install copies to support the level of use
      -	authorized, providing Licensee reproduces this License Agreement
      -	and any other legends of ownership on each copy, or partial copy,
      -	of the Software.
      -
      -    If Licensee acquires this Software as a program upgrade,
      -    Licensee's authorization to use the Software from which Licensee
      -    upgraded is terminated.
      -
      -    Licensee will ensure that anyone who uses the Software does so
      -    only in compliance with the terms of this License Agreement.
      -
      -    Licensee may not 
      -
      -	1) use, copy, install, compile, modify, or distribute the
      -    	Software except as provided in this License Agreement;
      -
      -	2) reverse assemble, reverse engineer, reverse compile, or
      -	otherwise translate the Software except as specifically
      -    	permitted by law without the possibility of contractual
      -    	waiver; or
      -
      -	3) rent, sublicense or lease the Software.
      -
      -4.  Authorizations
      -
      -    The extent of authorization depends on the ownership of a Proof of
      -    Authorization for the Software.
      -
      -    Usage of the Software for any other purpose not explicitly covered
      -    by this License Agreement or granted by the Proof of Authorization
      -    is not permitted and requires the written prior permission from
      -    eGenix.com.
      -
      -5.  Modifications
      -
      -    Software modifications may only be distributed in form of patches
      -    to the original files contained in the Software Distribution.
      -
      -    The patches must be accompanied by a legend of origin and
      -    ownership and a visible message stating that the patches are not
      -    original Software delivered by eGenix.com, nor that eGenix.com can
      -    be held liable for possible damages related directly or indirectly
      -    to the patches if they are applied to the Software.
      -
      -6.  Experimental Code or Features
      -
      -    The Software may include components containing experimental code
      -    or features which may be modified substantially before becoming
      -    generally available.
      -
      -    These experimental components or features may not be at the level
      -    of performance or compatibility of generally available eGenix.com
      -    products. eGenix.com does not guarantee that any of the
      -    experimental components or features contained in the eGenix.com
      -    will ever be made generally available.
      -
      -7.  Expiration and License Control Devices
      -
      -    Components of the Software may contain disabling or license
      -    control devices that will prevent them from being used after the
      -    expiration of a period of time or on Installation Targets for
      -    which no license was obtained.
      -
      -    Licensee will not tamper with these disabling devices or the
      -    components. Licensee will take precautions to avoid any loss of
      -    data that might result when the components can no longer be used.
      -
      -8.  NO WARRANTY
      -
      -    eGenix.com is making the Software available to Licensee on an "AS
      -    IS" basis. SUBJECT TO ANY STATUTORY WARRANTIES WHICH CAN NOT BE
      -    EXCLUDED, EGENIX.COM MAKES NO REPRESENTATIONS OR WARRANTIES,
      -    EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION,
      -    EGENIX.COM MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY
      -    OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT
      -    THE USE OF THE SOFTWARE WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
      -
      -9.  LIMITATION OF LIABILITY
      -
      -    TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT
      -    SHALL EGENIX.COM BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE
      -    SOFTWARE FOR (I) ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES
      -    OR LOSS (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF
      -    BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS
      -    INFORMATION, OR OTHER PECUNIARY LOSS) AS A RESULT OF USING,
      -    MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY DERIVATIVE THEREOF,
      -    EVEN IF ADVISED OF THE POSSIBILITY THEREOF; OR (II) ANY AMOUNTS IN
      -    EXCESS OF THE AGGREGATE AMOUNTS PAID TO EGENIX.COM UNDER THIS
      -    LICENSE AGREEMENT DURING THE TWELVE (12) MONTH PERIOD PRECEEDING
      -    THE DATE THE CAUSE OF ACTION AROSE.
      -
      -    SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF
      -    INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THE ABOVE EXCLUSION OR
      -    LIMITATION MAY NOT APPLY TO LICENSEE.
      -
      -10. Termination
      -
      -    This License Agreement will automatically terminate upon a
      -    material breach of its terms and conditions if not cured within
      -    thirty (30) days of written notice by eGenix.com. Upon
      -    termination, Licensee shall discontinue use and remove all
      -    installed copies of the Software.
      -
      -11. Indemnification 
      -
      -    Licensee hereby agrees to indemnify eGenix.com against and hold
      -    harmless eGenix.com from any claims, lawsuits or other losses that
      -    arise out of Licensee's breach of any provision of this License
      -    Agreement.
      -
      -12. Third Party Rights 
      -
      -    Any software or documentation in source or binary form provided
      -    along with the Software that is associated with a separate license
      -    agreement is licensed to Licensee under the terms of that license
      -    agreement. This License Agreement does not apply to those portions
      -    of the Software. Copies of the third party licenses are included
      -    in the Software Distribution.
      -
      -13. High Risk Activities 
      -
      -    The Software is not fault-tolerant and is not designed,
      -    manufactured or intended for use or resale as on-line control
      -    equipment in hazardous environments requiring fail-safe
      -    performance, such as in the operation of nuclear facilities,
      -    aircraft navigation or communication systems, air traffic control,
      -    direct life support machines, or weapons systems, in which the
      -    failure of the Software, or any software, tool, process, or
      -    service that was developed using the Software, could lead directly
      -    to death, personal injury, or severe physical or environmental
      -    damage ("High Risk Activities").
      -
      -    Accordingly, eGenix.com specifically disclaims any express or
      -    implied warranty of fitness for High Risk Activities.
      -
      -    Licensee agree that eGenix.com will not be liable for any claims
      -    or damages arising from the use of the Software, or any software,
      -    tool, process, or service that was developed using the Software,
      -    in such applications.
      -
      -14. General
      -
      -    Nothing in this License Agreement affects any statutory rights of
      -    consumers that cannot be waived or limited by contract.
      -
      -    Nothing in this License Agreement shall be deemed to create any
      -    relationship of agency, partnership, or joint venture between
      -    eGenix.com and Licensee.
      -
      -    If any provision of this License Agreement shall be unlawful,
      -    void, or for any reason unenforceable, such provision shall be
      -    modified to the extent necessary to render it enforceable without
      -    losing its intent, or, if no such modification is possible, be
      -    severed from this License Agreement and shall not affect the
      -    validity and enforceability of the remaining provisions of this
      -    License Agreement.
      -
      -    This License Agreement shall be governed by and interpreted in all
      -    respects by the law of Germany, excluding conflict of law
      -    provisions. It shall not be governed by the United Nations
      -    Convention on Contracts for International Sale of Goods.
      -
      -    This License Agreement does not grant permission to use eGenix.com
      -    trademarks or trade names in a trademark sense to endorse or
      -    promote products or services of Licensee, or any third party.
      -
      -    The controlling language of this License Agreement is English. If
      -    Licensee has received a translation into another language, it has
      -    been provided for Licensee's convenience only.
      -
      -15. Agreement
      -
      -    By downloading, copying, installing or otherwise using the
      -    Software, Licensee agrees to be bound by the terms and conditions
      -    of this License Agreement.
      -
      -
      -    For question regarding this License Agreement, please write to:
      -
      -	      eGenix.com Software, Skills and Services GmbH
      -	      Pastor-Loeh-Str. 48
      -	      D-40764 Langenfeld
      -	      Germany
      -		    
      -
      -
      - -

      - If you have questions, please send e-mail to licenses@egenix.com or - use the above postal address. - -

      - -

      eGenix.com Proof of Authorization (1 CPU License)

      - -

      - This is the "Proof of Authorization" we send out for a "1 - CPU License" subject to the above license. It permits you to - install and use the Software on one machine having at most 2 - processors. - -

      -

      - - - - -
      -
      -________________________________________________________________________
      -
      -EGENIX.COM PROOF OF AUTHORIZATION:                         1 CPU License
      -________________________________________________________________________
      -
      -1.  License Grant
      -
      -    eGenix.com Software, Skills and Services GmbH ("eGenix.com"),
      -    having an office at Pastor-Loeh-Str. 48, D-40764 Langenfeld,
      -    Germany, hereby grants the Individual or Organization ("Licensee")
      -
      -       Licensee:  xxxxxx
      -
      -    a non-exclusive, world-wide license to use the software listed
      -    below in source or binary form and its associated documentation
      -    ("the Software") under the terms and conditions of this License
      -    Agreement and to the extent authorized by this Proof of
      -    Authorization.
      -
      -2.  Covered Software
      -
      -       Software Name:              mxODBC Python ODBC Interface
      -       Software Version:           2.1
      -				   (including all patch level releases)
      -       Software Distribution:      As officially made available by 
      -				   eGenix.com on http://www.egenix.com/
      -       Operating System:           any compatible operating system
      -
      -3.  Authorizations
      -
      -    eGenix.com hereby authorizes Licensee to copy, install, compile,
      -    modify and use the Software on the following Installation Targets
      -    under the terms of this License Agreement.
      -
      -       Installation Targets:       one (1) CPU
      -
      -    Use of the Software for any other purpose or redistribution IS NOT
      -    PERMITTED BY THIS PROOF OF AUTHORIZATION.
      -
      -4.  Proof
      -
      -    This Proof of Authorization was issued by
      -
      -	Marc-Andre Lemburg, CEO eGenix.com
      -	Langenfeld, xxxx-xx-xx
      -
      -	Proof of Authorization Key:
      -	xxxx-xxxx-xxxx-xxxx-xxxx-xxxx
      -		    
      -
      -
      - -

      - When you buy CPU licenses, you will receive a digitally - signed "Proof of Authorization" by e-mail. - -

      - The PGP key used to signed these proofs is named "eGenix.com - Licenses <licenses@egenix.com>" and can be fetched - from any PGP key server, e.g. OpenPGP Public Key - Server. The PGP key ID is 8C25C2A2; its fingerprint is - "2E1B D691 A231 E09B CEF5 C9D5 C792 13DD 8C25 C2A2". To - check the digital signature, use one of the available PGP or - GPG programs available on the Internet. - -

      - -

      eGenix.com Proof of Authorization (1 Developer CPU License)

      - -

      - This is the "Proof of Authorization" we send out for a "1 - Developer CPU License" subject to the above license. It - allows you to redistribute the Software developed on the - developer machine under certain conditions and is targetted - at product developers wanting to use the Software in their - products. - -

      - Please contact sales@eGenix.com if you - have questions about the redistribution conditions or other - requirements. - -

      -

      - - - - -
      -
      -________________________________________________________________________
      -
      -EGENIX.COM PROOF OF AUTHORIZATION:               1 Developer CPU License
      -________________________________________________________________________
      -
      -1.  License Grant
      -
      -    eGenix.com Software, Skills and Services GmbH ("eGenix.com"),
      -    having an office at Pastor-Loeh-Str. 48, D-40764 Langenfeld,
      -    Germany, hereby grants the Individual or Organization ("Licensee")
      -
      -       Licensee:  xxxxxx
      -
      -    a non-exclusive, world-wide license to use the software listed
      -    below in source or binary form and its associated documentation
      -    ("the Software") under the terms and conditions of this License
      -    Agreement and to the extent authorized by this Proof of
      -    Authorization.
      -
      -2.  Covered Software
      -
      -       Software Name:              mxODBC Python ODBC Interface
      -       Software Version:           2.1
      -				   (including all patch level releases)
      -       Software Distribution:      As officially made available by 
      -				   eGenix.com on http://www.egenix.com/
      -       Operating System:           any compatible operating system
      -
      -3.  Authorizations
      -
      -3.1. Application Development
      -
      -    eGenix.com hereby authorizes Licensee to copy, install, compile,
      -    modify and use the Software on the following Developer
      -    Installation Targets for the purpose of developing products using
      -    the Software as integral part.
      -
      -       Developer Installation Targets: one (1) CPU
      -
      -3.2. Redistribution
      -
      -    eGenix.com hereby authorizes Licensee to redistribute the Software
      -    bundled with a product developed by Licensee on the Developer
      -    Installation Targets ("the Product") subject to the terms and
      -    conditions of this License Agreement for installation and use in
      -    combination with the Product on the following Redistribution
      -    Installation Targets, provided that:
      -
      -        1) Licensee shall not and shall not permit or assist any third
      -        party to sell or distribute the Software as a separate
      -        product;
      -
      -        2) Licensee shall not and shall not permit any third party to
      -
      -           (i) market, sell or distribute the Software to any end user
      -           except subject to the eGenix Commercial License Agreement,
      -
      -           (ii) rent, sell, lease or otherwise transfer the Software
      -           or any part thereof or use it for the benefit of any third
      -           party,
      -
      -           (iii) use the Software outside the Product or for any other
      -           purpose not expressly licensed hereunder;
      -
      -        3) the Product does not provide functions or capabilities
      -        similar to those of the Software itself, i.e. the Product does
      -        not introduce commercial competition for the Software as sold
      -        by eGenix.com.
      -
      -       Redistribution Installation Targets: any number of CPUs capable of
      -                                            running the Product and the
      -                                            Software
      -
      -4.  Proof
      -
      -    This Proof of Authorization was issued by
      -
      -	Marc-Andre Lemburg, CEO eGenix.com
      -	Langenfeld, xxxx-xx-xx
      -
      -	Proof of Authorization Key:
      -	xxxx-xxxx-xxxx-xxxx-xxxx-xxxx
      -		    
      -
      -
      - -

      - When you buy Developer CPU licenses, you will receive a - digitally signed "Proof of Authorization" by e-mail. - -

      - The PGP key used to signed these proofs is named "eGenix.com - Licenses <licenses@egenix.com>" and can be fetched - from any PGP key server, e.g. OpenPGP Public Key - Server. The PGP key ID is 8C25C2A2; its fingerprint is - "2E1B D691 A231 E09B CEF5 C9D5 C792 13DD 8C25 C2A2". To - check the digital signature, use one of the available PGP or - GPG programs available on the Internet. - -

      - If you have questions, please send e-mail to licenses@egenix.com or - use the above postal address. - -

      - -

      eGenix.com Proof of Authorization (Non-Commercial-Use 1 CPU License)

      - -

      - This is the "Proof of Authorization" we send out for a - "Non-Commercial-Use 1 CPU License" subject to the above - license agreement. It permits you to install and use the - Software on one machine having at most 2 processors in a - Non-Commercial Environment as defined in the license - agreement. - -

      - Please contact licenses@eGenix.com if - you have questions about the term "Non-Commercial - Environment" and whether this license covers your needs or - not. - -

      -

      - - - - -
      -
      -________________________________________________________________________
      -
      -EGENIX.COM PROOF OF AUTHORIZATION:      Non-Commercial-Use 1 CPU License
      -________________________________________________________________________
      -
      -1.  License Grant
      -
      -    eGenix.com Software, Skills and Services GmbH ("eGenix.com"),
      -    having an office at Pastor-Loeh-Str. 48, D-40764 Langenfeld,
      -    Germany, hereby grants the Individual or Organization ("Licensee")
      -
      -       Licensee:  xxxxxx
      -
      -    a non-exclusive, world-wide license to use the software listed
      -    below in source or binary form and its associated documentation
      -    ("the Software") under the terms and conditions of this License
      -    Agreement and to the extent authorized by this Proof of
      -    Authorization.
      -
      -2.  Covered Software
      -
      -       Software Name:              mxODBC Python ODBC Interface
      -       Software Version:           2.1
      -				   (including all patch level releases)
      -       Software Distribution:      As officially made available by 
      -				   eGenix.com on http://www.egenix.com/
      -       Operating System:           any compatible operating system
      -
      -3.  Authorizations
      -
      -    eGenix.com hereby authorizes Licensee to copy, install, compile,
      -    modify and use the Software on the following Installation Targets
      -    under the terms of this License Agreement IN NON-COMMERCIAL
      -    ENVIRONMENTS ONLY.
      -
      -       Installation Targets:       one (1) CPU
      -
      -    Use of the Software in a Commercial Environment or for any other
      -    purpose or redistribution IS NOT PERMITTED BY THIS PROOF OF
      -    AUTHORIZATION.
      -
      -4.  Proof
      -
      -    This Proof of Authorization was issued by
      -
      -	Marc-Andre Lemburg, CEO eGenix.com
      -	Langenfeld, xxxx-xx-xx
      -
      -	Proof of Authorization Key:
      -	xxxx-xxxx-xxxx-xxxx-xxxx-xxxx
      -		    
      -
      -
      - -

      - When you request Non-Commercial-Use CPU licenses, you will - receive a digitally signed "Proof of Authorization" by - e-mail. - -

      - The PGP key used to signed these proofs is named "eGenix.com - Licenses <licenses@egenix.com>" and can be fetched - from any PGP key server, e.g. OpenPGP Public Key - Server. The PGP key ID is 8C25C2A2; its fingerprint is - "2E1B D691 A231 E09B CEF5 C9D5 C792 13DD 8C25 C2A2". To - check the digital signature, use one of the available PGP or - GPG programs available on the Internet. - -

      - -

    - -
    -
    © 2000-2003, Copyright by eGenix.com - Software GmbH, Langengeld, Germany; All Rights Reserved. mailto: - info@egenix.com -
    - - - diff -Nru simpleparse-2.1.0a1/stt/Doc/mxTextTools.html simpleparse-2.2.0/stt/Doc/mxTextTools.html --- simpleparse-2.1.0a1/stt/Doc/mxTextTools.html 2006-02-18 23:33:31.000000000 +0000 +++ simpleparse-2.2.0/stt/Doc/mxTextTools.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,2677 +0,0 @@ - - - - TextTools - Fast Text Manipulation Tools for Python - - - - - - -
    - -

    mxTextTools - Fast Text Manipulation Tools for Python

    - -
    - - - - -
    - - Engine : - TextSearch Objects : - CharSet Objects : - Functions : - Constants : - Examples : - Structure : - Support : - Download : - Copyright & License : - History : - Home - - - - Version 2.1.0 - -
    -
    - -

    Introduction

    - -
      - -

      - mxTextTools is a collection of high-speed string - manipulation routines and new Python objects for dealing - with common text processing tasks. - -

      - One of the major features of this package is the integrated - Tagging Engine which allows accessing the speed of compiled - C programs while maintaining the portability of Python. The - Tagging Engine uses byte code "programs" written in form of - Python tuples. These programs are then translated into an - internal binary form which gets processed by a very fast - virtual machine designed specifically for scanning text - data. - -

      - As a result, the Tagging Engine allows parsing text at - higher speeds than e.g. regular expression packages while - still maintaining the flexibility of programming the parser - in Python. Callbacks and user-defined matching functions - extends this approach far beyond what you could do with - other common text processing methods. - -

      - Two other major features are the search and character set - objects provided by the package. Both are implemented in C - to give you maximum performance on all supported platforms. - -

      - A note about the word 'tagging': This originated from - what is done in HTML to mark some text with a certain extra - information. The Tagging Engine extends this notion to - assigning Python objects to text substrings. Every substring - marked in this way carries a 'tag' (the object) which can be - used to do all kinds of useful things. - -

      - If you are looking for more tutorial style documentation of - mxTextTools, there's a new book by David Mertz about Text Processing with - Python which covers mxTextTools and other text oriented - tools at great length. - -

    - - - -

    Tagging Engine

    - -
    - - - -

    TextSearch Object

    - -
      - -

      - The TextSearch object is immutable and usable for one search - string per object only. However, once created, the - TextSearch objects can be applied to as many text strings as - you like -- much like compiled regular expressions. Matching - is done exact (doing translations on-the-fly if supported by - the search algorithm). - -

      - Furthermore, the TextSearch objects can be pickled and - implement the copy protocol as defined by the copy - module. Comparisons and hashing are not implemented (the - objects are stored by id in dictionaries). - -

      - Depending on the search algorithm, TextSearch objects can - search in 8-bit strings and/or Unicode. Searching in memory - buffers is currently not supported. Accordingly, the search - string itself may also be an 8-bit string or Unicode. - -

      TextSearch Object Constructors

      - -
        - -

        - In older versions of mxTextTools there were two separate - constructors for search objects: BMS() for - Boyer-Moore and FS() for the (unpublished) - FastSearch algorithm. With 2.1.0 the interface was - changed to merge these two constructors into one having - the algorithm type as parameter. - -

        - Note: The FastSearch algorithm is *not* included - in the public release of mxTextTools. - -

        -

        -
        - TextSearch(match,translate=None,algorithm=default_algorithm) -
        - -
        - Create a TextSearch substring search object for the - string match implementing the algorithm specified in - the constructor. -

        - algorithm defines the algorithm to - use. Possible values are: -

        - -
        BOYERMOORE
        - -
        Enhanced Boyer-Moore-Horspool style algorithm - for searching in 8-bit text. Unicode is not - supported. On-the-fly translation is - supported.

        - -
        FASTSEARCH
        - -
        Enhanced Boyer-Moore style algorithm for - searching in 8-bit text. This algorithm provides - better performance for match patterns having - repeating sequences, like e.g. DNA strings. Unicode - is not supported. On-the-fly translation is - supported. -

        - Not included in the public release of - mxTextTools.

        - -
        TRIVIAL
        - -
        Trivial right-to-left search algorithm. This - algorithm can be used to search in 8-bit text and - Unicode. On-the-fly translation is not - supported.
        - -
        - -

        - algorithm defaults to BOYERMOORE (or - FASTSEARCH if available) for 8-bit match strings and - TRIVIAL for Unicode match strings. - -

        - translate is an optional - translate-string like the one used in the module - 're', i.e. a 256 character string mapping the - oridnals of the base character set to new - characters. It is supported by the BOYERMOORE and - the FASTSEARCH algorithm only. - -

        - This function supports keyword arguments. - -

        - -

        - BMS(match[,translate])
        - -
        - DEPRECATED: Use TextSearch(match, translate, - BOYERMOORE) instead. -

        - -

        - FS(match[,translate])
        - -
        - DEPRECATED: Use TextSearch(match, translate, - FASTSEARCH) instead. -

        - -

        -
      - -

      TextSearch Object Instance Variables

      - -
        -

        - To provide some help for reflection and pickling the - TextSearch object gives (read-only) access to these - attributes. - -

        -

        - -
        - match
        - -
        - The string that the search object will look for in the - search text.

        - -
        - translate
        - -
        - The translate string used by the object or None (if no - translate string was passed to the - constructor).

        - -
        - algorithm
        - -
        - The algorithm used by the TextSearch object. For possible - values, see the TextSearch() constructor documentation. -

        - -
        - -
      - -

      TextSearch Object Instance Methods

      - -
        -

        - The TextSearch object has the following methods: - -

        -

        - -
        - search(text,[start=0,stop=len(text)])
        - -
        - Search for the substring match in text, looking only - at the slice [start:stop] and return - the slice (l,r) where the substring was - found, or (start,start) if it was not - found.

        - -

        - find(text,[start=0,stop=len(text)])
        - -
        - Search for the substring match in text, looking only - at the slice [start:stop] and return - the index where the substring was found, or - -1 if it was not found. This interface is - compatible with string.find.

        - -

        - findall(text,start=0,stop=len(text))
        - -
        - Same as search(), but return a list of - all non-overlapping slices (l,r) where - the match string can be found in text.

        - -

        - -

        - Note that translating the text before doing the search - often results in a better performance. Use - string.translate() to do that efficiently. - -

      -
    - -
    - -

    CharSet Object

    - -
      - -

      - The CharSet object is an immutable object which can be used - for character set based string operations like text - matching, searching, splitting etc. - -

      - CharSet objects can be pickled and implement the copy - protocol as defined by the copy module as well as the - 'in'-protocol, so that c in charset works as - expected. Comparisons and hashing are not implemented (the - objects are stored by id in dictionaries). - -

      - The objects support both 8-bit strings and UCS-2 Unicode in - both the character set definition and the various methods. - Mixing of the supported types is also allowed. Memory - buffers are currently not supported. - -

      CharSet Object Constructor

      - -
        - -

        -

        -
        - CharSet(definition) -
        - -
        - Create a CharSet object for the given character set - definition. -

        - definition may be an 8-bit string or - Unicode. -

        - The constructor supports the re-module syntax for - defining character sets: "a-e" maps to "abcde" (the - backslash can be used to escape the special meaning - of "-", e.g. r"a\-e" maps to "a-e") and "^a-e" maps - to the set containing all but the characters - "abcde". -

        - Note that the special meaning of "^" only applies if - it appears as first character in a CharSet - definition. If you want to create a CharSet with the - single character "^", then you'll have to use the - escaped form: r"\^". The non-escape form "^" would - result in a CharSet matching all characters. -

        - To add the backslash character to a CharSet you have - to escape with itself: r"\\". -

        - Watch out for the Python quoting semantics in these - explanations: the small r in front of some of these - strings makes the raw Python literal strings which - means that no interpretation of backslashes is - applied: r"\\" == "\\\\" and r"a\-e" == "a\\-e". - -

        - -

        -
      - -

      CharSet Object Instance Variables

      - -
        -

        - To provide some help for reflection and pickling the - CharSet object gives (read-only) access to these - attributes. - -

        -

        - -
        - definition
        - -
        - The definition string which was passed to the - constructor.

        - -
        - -
      - -

      CharSet Object Instance Methods

      - -
        -

        - The CharSet object has these methods: - -

        -

        - -
        - contains(char) -
        - -
        - Return 1 if char is included in the character set, 0 - otherwise. - -

        - -

        - search(text[, direction=1, start=0, stop=len(text)]) -
        - -
        - Search text[start:stop] for the first - character included in the character set. Returns - None if no such character is found or the - index position of the found character. -

        - direction defines the search direction: - a positive value searches forward starting from - text[start], while a negative value - searches backwards from text[stop-1]. - -

        - -

        - match(text[, direction=1, start=0, stop=len(text)]) -
        - -
        - Look for the longest match of characters in - text[start:stop] which appear in the - character set. Returns the length of this match as - integer. -

        - direction defines the match direction: - a positive value searches forward starting from - text[start] giving a prefix match, - while a negative value searches backwards from - text[stop-1] giving a suffix match. - -

        - -

        - split(text, [,start=0, stop=len(text)])
        - -
        - Split text[start:stop] into a list of - substrings using the character set definition, - omitting the splitting parts and empty substrings. - -

        - -

        - splitx(text, [,start=0, stop=len(text)])
        - -
        - Split text[start:stop] into a list of - substrings using the character set definition, such - that every second entry consists only of characters in - the set. - -

        - -

        - strip(text[, where=0, start=0, stop=len(text)])
        - -
        - Strip all characters in text[start:stop] - appearing in the character set. -

        - where indicates where to strip (<0: - left; =0: left and right; >0: right). -

        - -

        - -
      -
    - -
    - -

    Functions

    - -
    - - - -

    Constants

    - -
      - -

      - The package exports these constants. They are defined in - Constants/Sets. - -

      - Note that Unicode defines many more characters in the - following categories. The character sets defined here are - restricted to ASCII (and parts of Latin-1) only. - -

      -

        -
        - -
        - a2z
        - -
        - 'abcdefghijklmnopqrstuvwxyz'

        - -

        - A2Z
        - -
        - 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

        - -

        - a2z
        - -
        - 'abcdefghijklmnopqrstuvwxyz'

        - -

        - umlaute
        - -
        - 'äöüß'

        - -

        - Umlaute
        - -
        - 'ÄÖÜ'

        - -

        - alpha
        - -
        - A2Z + a2z

        - -

        - a2z
        - -
        - 'abcdefghijklmnopqrstuvwxyz'

        - -

        - german_alpha
        - -
        - A2Z + a2z + umlaute + Umlaute

        - -

        - number
        - -
        - '0123456789'

        - -

        - alphanumeric
        - -
        - alpha + number

        - -

        - white
        - -
        - ' \t\v'

        - -

        - newline
        - -
        - '\n\r'

        - -

        - formfeed
        - -
        - '\f'

        - -

        - whitespace
        - -
        - white + newline + formfeed

        - -

        - any
        - -
        - All characters from \000-\377

        - -

        - *_charset
        - -
        - All of the above as CharSet() objects.

        - -

        - *_set
        - -
        - All of the above as set() compatible character - sets.

        - -

        - tagtable_cache
        - -
        - This the cache dictionary which is used by the - TagTable() compiler to store compiled Tag Table - definitions. It has a hard limit of 100 entries, but - can also be managed by user routines to lower this - limit.

        - -

        - BOYERMOORE, FASTSEARCH, TRIVIAL
        - -
        - TextSearch() algorithm values. -

        - -

        -
      - -
    - -
    - -

    Examples of Use

    - -
      - -

      - The Examples/ subdirectory of the package contains a - few examples of how tables can be written and used. Here is a - non-trivial example for parsing HTML (well, most of it): - -

      
      -    from simpleparse.stt.TextTools import *
      -
      -    error = '***syntax error'			# error tag obj
      -
      -    tagname_set = set(alpha+'-'+number)
      -    tagattrname_set = set(alpha+'-'+number)
      -    tagvalue_set = set('"\'> ',0)
      -    white_set = set(' \r\n\t')
      -
      -    tagattr = (
      -	   # name
      -	   ('name',AllInSet,tagattrname_set),
      -	   # with value ?
      -	   (None,Is,'=',MatchOk),
      -	   # skip junk
      -	   (None,AllInSet,white_set,+1),
      -	   # unquoted value
      -	   ('value',AllInSet,tagvalue_set,+1,MatchOk),
      -	   # double quoted value
      -	   (None,Is,'"',+5),
      -	     ('value',AllNotIn,'"',+1,+2),
      -	     ('value',Skip,0),
      -	     (None,Is,'"'),
      -	     (None,Jump,To,MatchOk),
      -	   # single quoted value
      -	   (None,Is,'\''),
      -	     ('value',AllNotIn,'\'',+1,+2),
      -	     ('value',Skip,0),
      -	     (None,Is,'\'')
      -	   )
      -
      -    valuetable = (
      -	# ignore whitespace + '='
      -	(None,AllInSet,set(' \r\n\t='),+1),
      -	# unquoted value
      -	('value',AllInSet,tagvalue_set,+1,MatchOk),
      -	# double quoted value
      -	(None,Is,'"',+5),
      -	 ('value',AllNotIn,'"',+1,+2),
      -	 ('value',Skip,0),
      -	 (None,Is,'"'),
      -	 (None,Jump,To,MatchOk),
      -	# single quoted value
      -	(None,Is,'\''),
      -	 ('value',AllNotIn,'\'',+1,+2),
      -	 ('value',Skip,0),
      -	 (None,Is,'\'')
      -	)
      -
      -    allattrs = (# look for attributes
      -	       (None,AllInSet,white_set,+4),
      -	        (None,Is,'>',+1,MatchOk),
      -	        ('tagattr',Table,tagattr),
      -	        (None,Jump,To,-3),
      -	       (None,Is,'>',+1,MatchOk),
      -	       # handle incorrect attributes
      -	       (error,AllNotIn,'> \r\n\t'),
      -	       (None,Jump,To,-6)
      -	       )
      -
      -    htmltag = ((None,Is,'<'),
      -	       # is this a closing tag ?
      -	       ('closetag',Is,'/',+1),
      -	       # a coment ?
      -	       ('comment',Is,'!',+8),
      -		(None,Word,'--',+4),
      -		('text',sWordStart,BMS('-->'),+1),
      -		(None,Skip,3),
      -		(None,Jump,To,MatchOk),
      -		# a SGML-Tag ?
      -		('other',AllNotIn,'>',+1),
      -		(None,Is,'>'),
      -		    (None,Jump,To,MatchOk),
      -		   # XMP-Tag ?
      -		   ('tagname',Word,'XMP',+5),
      -		    (None,Is,'>'),
      -		    ('text',WordStart,'</XMP>'),
      -		    (None,Skip,len('</XMP>')),
      -		    (None,Jump,To,MatchOk),
      -		   # get the tag name
      -		   ('tagname',AllInSet,tagname_set),
      -		   # look for attributes
      -		   (None,AllInSet,white_set,+4),
      -		    (None,Is,'>',+1,MatchOk),
      -		    ('tagattr',Table,tagattr),
      -		    (None,Jump,To,-3),
      -		   (None,Is,'>',+1,MatchOk),
      -		   # handle incorrect attributes
      -		   (error,AllNotIn,'> \n\r\t'),
      -		   (None,Jump,To,-6)
      -		  )
      -
      -    htmltable = (# HTML-Tag
      -		 ('htmltag',Table,htmltag,+1,+4),
      -		 # not HTML, but still using this syntax: error or inside XMP-tag !
      -		 (error,Is,'<',+3),
      -		  (error,AllNotIn,'>',+1),
      -		  (error,Is,'>'),
      -		 # normal text
      -		 ('text',AllNotIn,'<',+1),
      -		 # end of file
      -		 ('eof',EOF,Here,-5),
      -		)
      -      
      -	
      - -

      - I hope this doesn't scare you away :-) ... it's - fast as hell. - -

    - -
    - -

    Package Structure

    - -
      - -
      -[TextTools]
      -       [Constants]
      -              Sets.py
      -              TagTables.py
      -       Doc/
      -       [Examples]
      -              HTML.py
      -              Loop.py
      -              Python.py
      -              RTF.py
      -              RegExp.py
      -              Tim.py
      -              Words.py
      -              altRTF.py
      -              pytag.py
      -       [mxTextTools]
      -              test.py
      -       TextTools.py
      -    
      - -

      - Entries enclosed in brackets are packages (i.e. they are - directories that include a __init__.py file). Ones with - slashes are just ordinary subdirectories that are not accessible - via import. - -

      - The package TextTools imports everything needed from the other - components. It is sometimes also handy to do a from - simpleparse.stt.TextTools.Constants.TagTables import *. - -

      - Examples/ contains a few demos of what the Tag Tables - can do. - -

      - -

    - -
    - -

    Optional Add-Ons for mxTextTools

    - -
      - -

      - Mike C. Fletcher is working on a Tag Table generator called SimpleParse. - It works as parser generating front end to the Tagging Engine - and converts a EBNF style grammar into a Tag Table directly - useable with the tag() function. - -

      - Tony J. Ibbs has started to work on a meta-language - for mxTextTools. It aims at simplifying the task of writing - Tag Table tuples using a Python style syntax. It also gets rid - off the annoying jump offset calculations. - -

      - Andrew Dalke has started work on a parser generator called Martel built - upon mxTextTools which takes a regular expression grammer for a - format and turns the resultant parsed tree into a set of - callback events emulating the XML/SAX API. The results look very - promising ! - -

    - - - -

    Support

    - -
    - - - -

    Copyright & License

    - -
    - - - -

    History & Future

    - -
    - -

    -


    -
    -

    - © 1997-2000, Copyright by Marc-André Lemburg; - All Rights Reserved. mailto: mal@lemburg.com -

    - © 2000-2001, Copyright by eGenix.com Software GmbH; - All Rights Reserved. mailto: info@egenix.com -

    - - - - diff -Nru simpleparse-2.1.0a1/stt/__init__.py simpleparse-2.2.0/stt/__init__.py --- simpleparse-2.1.0a1/stt/__init__.py 2006-02-19 00:48:51.000000000 +0000 +++ simpleparse-2.2.0/stt/__init__.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,22 +0,0 @@ -"""SimpleParse' built-in version of the mxTextTools text-processing engine - - Copyright (c) 1998-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com - Copyright (c) 2000-2003, eGenix.com Software GmbH; mailto:info@egenix.com - Copyright (c) 2003-2006, Mike Fletcher; mailto:mcfletch@vrplumber.com - - See the documentation for further information on copyrights, - or contact the author. All Rights Reserved. - - IMPORTANT: - The subpackages included in the mx Extension series may have - different license terms imposed on them. Be sure to read the - documentation of each subpackage *before* using them. - -""" -__copyright__ = """\ -Copyright (c) 1998-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com -Copyright (c) 2000-2003, eGenix.com Software GmbH; mailto:info@egenix.com -Copyright (c) 2003-2006, Mike Fletcher; mailto:mcfletch@vrplumber.com - See the documentation for further information on copyrights, - or contact the author. All Rights Reserved. -""" diff -Nru simpleparse-2.1.0a1/stt/LICENSE simpleparse-2.2.0/stt/LICENSE --- simpleparse-2.1.0a1/stt/LICENSE 2006-02-18 23:32:36.000000000 +0000 +++ simpleparse-2.2.0/stt/LICENSE 1970-01-01 00:00:00.000000000 +0000 @@ -1,16 +0,0 @@ -============================================================================= -eGenix.com mx Extension Series for Python ------------------------------------------------------------------------------ - -Please see the LICENSE file in the package subdirectories for information -on copyright and authorized use of the packages. - -Some packages are licensed as Open Source software, others are free for -educational use only. All packages come with full source code. - ------------------------------------------------------------------------------ -Marc-Andre Lemburg, mal@egenix.com -CEO -eGenix.com GmbH -Langenfeld -Germany diff -Nru simpleparse-2.1.0a1/stt/mxLicense.html simpleparse-2.2.0/stt/mxLicense.html --- simpleparse-2.1.0a1/stt/mxLicense.html 2006-02-18 23:32:36.000000000 +0000 +++ simpleparse-2.2.0/stt/mxLicense.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,691 +0,0 @@ - - - - mx Extension Series - License Information - - - - - -
    -

    mx Extension Series - License Information

    - -
    - - - - -
    - - Public License : - Commercial License : - Home - - - - Version 1.0.0 - -
    -
    - -

    Introduction

    - -
      - -

      - The mx Extensions Series packages are brought to you by the - eGenix.com Software, Skills and Services GmbH, Langenfeld, - Germany. We are licensing our products under the following - two different licenses: -

      - -

      - The Public License is very similar to the Python 2.0 - license and covers the open source software made available - by eGenix.com which is free of charge even for commercial - use. - -

      - The Commercial License is intended for covering - commercial eGenix.com software, notably the mxODBC - package. Only private and non-commercial use is free of - charge. - -

      - If you have questions regarding these licenses, please - contact Licenses@eGenix.com. - If you would like to bundle the software with your - commercial product, please write to Sales@eGenix.com - for more information about the redistribution conditions and - terms. - -

    - - - -

    eGenix.com Public License

    - -
      - -

      - The eGenix.com Public License is similar to the Python 2.0 - and considered an Open Source license (in the sense defined - by the Open Source - Intiative (OSI)) by eGenix.com. - -

      - The license should also be compatible to the GNU Public License in case - that matters. The only part which is known to have caused - some problems with Richard Stallmann in the past is the - choice of law clause. - -

      - - - - - -
      - -

      EGENIX.COM PUBLIC LICENSE AGREEMENT VERSION 1.0.0

      -

      -

      1. Introduction

      -

      -This "License Agreement" is between eGenix.com Software, Skills and -Services GmbH ("eGenix.com"), having an office at Pastor-Loeh-Str. 48, -D-40764 Langenfeld, Germany, and the Individual or Organization -("Licensee") accessing and otherwise using this software in source or -binary form and its associated documentation ("the Software"). -

      -

      2. License

      -

      -Subject to the terms and conditions of this eGenix.com Public License -Agreement, eGenix.com hereby grants Licensee a non-exclusive, -royalty-free, world-wide license to reproduce, analyze, test, perform -and/or display publicly, prepare derivative works, distribute, and -otherwise use the Software alone or in any derivative version, -provided, however, that the eGenix.com Public License Agreement is -retained in the Software, or in any derivative version of the Software -prepared by Licensee. -

      -

      3. NO WARRANTY

      -

      -eGenix.com is making the Software available to Licensee on an "AS IS" -basis. SUBJECT TO ANY STATUTORY WARRANTIES WHICH CAN NOT BE EXCLUDED, -EGENIX.COM MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. -BY WAY OF EXAMPLE, BUT NOT LIMITATION, EGENIX.COM MAKES NO AND -DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS -FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT -INFRINGE ANY THIRD PARTY RIGHTS. -

      -

      4. LIMITATION OF LIABILITY

      -

      -EGENIX.COM SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE -SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS -(INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, -BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER -PECUNIARY LOSS) AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE -SOFTWARE, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE -POSSIBILITY THEREOF. -

      -SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF -INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THE ABOVE EXCLUSION OR -LIMITATION MAY NOT APPLY TO LICENSEE. -

      -

      5. Termination

      -

      -This License Agreement will automatically terminate upon a material -breach of its terms and conditions. -

      -

      6. General

      -

      -Nothing in this License Agreement affects any statutory rights of -consumers that cannot be waived or limited by contract. -

      -Nothing in this License Agreement shall be deemed to create any -relationship of agency, partnership, or joint venture between -eGenix.com and Licensee. -

      -If any provision of this License Agreement shall be unlawful, void, or -for any reason unenforceable, such provision shall be modified to the -extent necessary to render it enforceable without losing its intent, -or, if no such modification is possible, be severed from this License -Agreement and shall not affect the validity and enforceability of the -remaining provisions of this License Agreement. -

      -This License Agreement shall be governed by and interpreted in all -respects by the law of Germany, excluding conflict of law -provisions. It shall not be governed by the United Nations Convention -on Contracts for International Sale of Goods. -

      -This License Agreement does not grant permission to use eGenix.com -trademarks or trade names in a trademark sense to endorse or promote -products or services of Licensee, or any third party. -

      -The controlling language of this License Agreement is English. If -Licensee has received a translation into another language, it has been -provided for Licensee's convenience only. -

      -

      14. Agreement

      -

      -By downloading, copying, installing or otherwise using the Software, -Licensee agrees to be bound by the terms and conditions of this -License Agreement. -

      - -

      - -

    - - - -

    eGenix.com Commercial License

    - -
      - -

      - The eGenix.com Commercial License is covers commercial - eGenix.com software, notably the mxODBC package. Only - private and non-commercial use is free of charge. Usage of - the software in commercial settings such as for implementing - in-house applications in/for companies or consulting work - where the software is used as tool requires a "Proof of - Authorization" which can be bought from eGenix.com. - -

      - - - - - -
      - -

      EGENIX.COM COMMERCIAL LICENSE AGREEMENT VERSION 1.0.0

      -

      -

      1. Introduction

      -

      -This "License Agreement" is between eGenix.com Software, Skills and -Services GmbH ("eGenix.com"), having an office at Pastor-Loeh-Str. 48, -D-40764 Langenfeld, Germany, and the Individual or Organization -("Licensee") accessing and otherwise using this software in source or -binary form and its associated documentation ("the Software"). -

      -

      2. Terms and Definitions

      -

      -The "Software" covered under this License Agreement includes without -limitation, all object code, source code, help files, publications, -documentation and other programs, products or tools that are included -in the official "Software Distribution" available from eGenix.com. -

      -The "Proof of Authorization" for the Software is a written and signed -notice from eGenix.com providing evidence of the extent of -authorizations the Licensee has acquired to use the Software and of -Licensee's eligibility for future upgrade program prices (if -announced) and potential special or promotional opportunities. As -such, the Proof of Authorization becomes part of this License Agreement. -

      -Installation of the Software ("Installation") refers to the process of -unpacking or copying the files included in the Software Distribution -to an Installation Target. -

      -"Installation Target" refers to the target of an installation -operation. Targets are defined as follows: -

      - 1) "CPU" refers to a central processing unit which is able to - store and/or execute the Software (a server, personal computer, or - other computer-like device) using at most two (2) processors, -

      - 2) "Site" refers to at most one hundred fifty (150) CPUs installed - at a single site of a company, -

      - 3) "Corporate" refers to at most one thousand (1000) CPUs - installed at an unlimited number of sites of the company, -

      - 4) "Developer CPU" refers to a single CPU used by at most one (1) - developer. -

      -When installing the Software on a server CPU for use by other CPUs in -a network, Licensee must obtain a License for the server CPU and for -all client CPUs attached to the network which will make use of the -Software by copying the Software in binary or source form from the -server into their CPU memory. If a CPU makes use of more than two (2) -processors, Licensee must obtain additional CPU licenses to cover the -total number of installed processors. Likewise, if a Developer CPU is -used by more than one developer, Licensee must obtain additional -Developer CPU licenses to cover the total number of developers using -the CPU. -

      -"Commercial Environment" refers to any application environment which -is aimed at producing profit. This includes, without limitation, -for-profit organizations, work as independent contractor, consultant -and other profit generating relationships with organizations or -individuals. -

      -"Non-Commercial Environments" are all those application environments -which do not directly or indirectly generate profit. Educational and -other officially acknowledged non-profit organizations are regarded as -being a Non-Commercial Environment in the above sense. -

      -

      3. License Grant

      -

      -Subject to the terms and conditions of this License Agreement, -eGenix.com hereby grants Licensee a non-exclusive, world-wide license -to -

      - 1) use the Software to the extent of authorizations Licensee has - acquired and -

      - 2) distribute, make and install copies to support the level of use - authorized, providing Licensee reproduces this License Agreement - and any other legends of ownership on each copy, or partial copy, - of the Software. -

      -If Licensee acquires this Software as a program upgrade, Licensee's -authorization to use the Software from which Licensee upgraded is -terminated. -

      -Licensee will ensure that anyone who uses the Software does so only in -compliance with the terms of this License Agreement. -

      -Licensee may not -

      - 1) use, copy, install, compile, modify, or distribute the Software - except as provided in this License Agreement; -

      - 2) reverse assemble, reverse engineer, reverse compile, or - otherwise translate the Software except as specifically permitted - by law without the possibility of contractual waiver; or -

      - 3) rent, sublicense or lease the Software. -

      -

      4. Authorizations

      -

      -The extent of authorization depends on the ownership of a Proof of -Authorization for the Software. -

      -Usage of the Software for any other purpose not explicitly covered by -this License Agreement or granted by the Proof of Authorization is not -permitted and requires the written prior permission from eGenix.com. -

      -

      4.1. Non-Commercial Environments

      -

      -This section applies to all uses of the Software without a Proof of -Authorization for the Software in a Non-Commercial Environment. -

      -Licensee may copy, install, compile, modify and use the Software under -the terms of this License Agreement FOR NON-COMMERCIAL PURPOSES ONLY. -

      -Use of the Software in a Commercial Environment or for any other -purpose, such as redistribution, IS NOT PERMITTED BY THIS LICENSE and -requires a Proof of Authorization from eGenix.com. -

      -

      4.2. Evaluation Period for Commercial Environments

      -

      -This section applies to all uses of the Software without a Proof of -Authorization for the Software in a Commercial Environment. -

      -Licensee may copy, install, compile, modify and use the Software under -the terms of this License Agreement FOR EVALUATION AND TESTING -PURPOSES and DURING A LIMITED EVALUATION PERIOD OF AT MOST THIRTY (30) -DAYS AFTER INITIAL INSTALLATION ONLY. -

      -For use of the Software after the evaluation period or for any other -purpose, such as redistribution, Licensee must obtain a Proof of -Authorization from eGenix.com. -

      -If Licensee decides not to obtain a Proof of Authorization after the -evaluation period, Licensee agrees to cease using and to remove all -installed copies of the Software. -

      -

      4.3. Usage under Proof of Authorization

      -

      -This section applies to all uses of the Software provided that -Licensee owns a Proof of Authorization for the Software. -

      -Licensee may copy, install, compile, modify, use and distribute the -Software to the extent of authorization acquired by the Proof of -Authorization and under the terms an conditions of this License -Agreement. -

      -

      5. Transfer of Rights and Obligations

      -

      -Licensee may transfer all license rights and obligations under a Proof -of Authorization for the Software to another party by transferring the -Proof of Authorization and a copy of this License Agreement and all -documentation. -

      -The transfer of Licensee's license rights and obligations terminates -Licensee's authorization to use the Software under the Proof of -Authorization. -

      -

      6. Modifications

      -

      -Software modifications may only be distributed in form of patches to -the original files contained in the Software Distribution. -

      -The patches must be accompanied by a legend of origin and ownership -and a visible message stating that the patches are not original -Software delivered by eGenix.com, nor that eGenix.com can be held -liable for possible damages related directly or indirectly to the -patches if they are applied to the Software. -

      -

      7. Experimental Code or Features

      -

      -The Software may include components containing experimental code or -features which may be modified substantially before becoming generally -available. -

      -These experimental components or features may not be at the level of -performance or compatibility of generally available eGenix.com -products. eGenix.com does not guarantee that any of the experimental -components or features contained in the eGenix.com will ever be made -generally available. -

      -

      8. Expiration and License Control Devices

      -

      -Components of the Software may contain disabling or license control -devices that will prevent them from being used after the expiration of -a period of time or on Installation Targets for which no license was -obtained. -

      -Licensee will not tamper with these disabling devices or the -components. Licensee will take precautions to avoid any loss of data -that might result when the components can no longer be used. -

      -

      9. NO WARRANTY

      -

      -eGenix.com is making the Software available to Licensee on an "AS IS" -basis. SUBJECT TO ANY STATUTORY WARRANTIES WHICH CAN NOT BE EXCLUDED, -EGENIX.COM MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR -IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, EGENIX.COM MAKES NO -AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR -FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE -WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. -

      -

      10. LIMITATION OF LIABILITY

      -

      -TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT SHALL -EGENIX.COM BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE SOFTWARE -FOR (I) ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS -(INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, -BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER -PECUNIARY LOSS) AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE -SOFTWARE, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE -POSSIBILITY THEREOF; OR (II) ANY AMOUNTS IN EXCESS OF THE AGGREGATE -AMOUNTS PAID TO EGENIX.COM UNDER THIS LICENSE AGREEMENT DURING THE -TWELVE (12) MONTH PERIOD PRECEEDING THE DATE THE CAUSE OF ACTION -AROSE. -

      -SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF -INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THE ABOVE EXCLUSION OR -LIMITATION MAY NOT APPLY TO LICENSEE. -

      -

      11. Termination

      -

      -This License Agreement will automatically terminate upon a material -breach of its terms and conditions if not cured within thirty (30) -days of written notice by eGenix.com. Upon termination, Licensee shall -discontinue use and remove all installed copies of the Software. -

      -

      12. Indemnification

      -

      -Licensee hereby agrees to indemnify eGenix.com against and hold -harmless eGenix.com from any claims, lawsuits or other losses that -arise out of Licensee's breach of any provision of this License -Agreement. -

      -

      13. Third Party Rights

      -

      -Any software or documentation in source or binary form provided along -with the Software that is associated with a separate license agreement -is licensed to Licensee under the terms of that license -agreement. This License Agreement does not apply to those portions of -the Software. Copies of the third party licenses are included in the -Software Distribution. -

      -

      14. High Risk Activities

      -

      -The Software is not fault-tolerant and is not designed, manufactured -or intended for use or resale as on-line control equipment in -hazardous environments requiring fail-safe performance, such as in the -operation of nuclear facilities, aircraft navigation or communication -systems, air traffic control, direct life support machines, or weapons -systems, in which the failure of the Software, or any software, tool, -process, or service that was developed using the Software, could lead -directly to death, personal injury, or severe physical or -environmental damage ("High Risk Activities"). -

      -Accordingly, eGenix.com specifically disclaims any express or implied -warranty of fitness for High Risk Activities. -

      -Licensee agree that eGenix.com will not be liable for any claims or -damages arising from the use of the Software, or any software, tool, -process, or service that was developed using the Software, in such -applications. -

      -

      15. General

      -

      -Nothing in this License Agreement affects any statutory rights of -consumers that cannot be waived or limited by contract. -

      -Nothing in this License Agreement shall be deemed to create any -relationship of agency, partnership, or joint venture between -eGenix.com and Licensee. -

      -If any provision of this License Agreement shall be unlawful, void, or -for any reason unenforceable, such provision shall be modified to the -extent necessary to render it enforceable without losing its intent, -or, if no such modification is possible, be severed from this License -Agreement and shall not affect the validity and enforceability of the -remaining provisions of this License Agreement. -

      -This License Agreement shall be governed by and interpreted in all -respects by the law of Germany, excluding conflict of law -provisions. It shall not be governed by the United Nations Convention -on Contracts for International Sale of Goods. -

      -This License Agreement does not grant permission to use eGenix.com -trademarks or trade names in a trademark sense to endorse or promote -products or services of Licensee, or any third party. -

      -The controlling language of this License Agreement is English. If -Licensee has received a translation into another language, it has been -provided for Licensee's convenience only. -

      -

      16. Agreement

      -

      -By downloading, copying, installing or otherwise using the Software, -Licensee agrees to be bound by the terms and conditions of this -License Agreement. -

      -

      -For question regarding this license agreement, please write to: -

      -	  eGenix.com Software, Skills and Services GmbH
      -	  Pastor-Loeh-Str. 48
      -	  D-40764 Langenfeld
      -	  Germany
      -
      - -
      - -

      - The following two sections give examples of the "Proof of - Authorization" for a commercial use license of product under - this license. - -

      - When you buy such a license, you will receive a signed - "Proof of Authorization" by postal mail within a week or - two. We will also send you the Proof of Authorization Key - by e-mail to acknowledge acceptance of the payment. - -

      - - - - - -
      - -

      EGENIX.COM PROOF OF AUTHORIZATION (Example: CPU License)

      -

      -

      1. License Grant

      -

      -eGenix.com Software, Skills and Services GmbH ("eGenix.com"), having -an office at Pastor-Loeh-Str. 48, D-40764 Langenfeld, Germany, hereby -grants the Individual or Organization ("Licensee") a non-exclusive, -world-wide license to use the software listed below in source or -binary form and its associated documentation ("the Software") under -the terms and conditions of the eGenix.com Commercial License -Agreement Version 1.0.0 and to the extent authorized by this Proof of -Authorization. -

      -

      2. Covered Software

      -
      -   Software Name:		   mxODBC Python ODBC Interface
      -   Software Version:		   Version 2.0.0
      -   Software Distribution:	   mxODBC-2.0.0.zip
      -   Software Distribution MD5 Hash: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
      -   Operating System:		   any compatible operating system
      -
      -

      3. Authorizations

      -

      -eGenix.com hereby authorizes Licensee to copy, install, compile, -modify and use the Software on the following Installation Targets. -

      -   Installation Targets:	   one (1) CPU
      -
      -Redistribution of the Software is not allowed under this Proof of -Authorization. -

      -

      4. Proof

      -

      -This Proof of Authorization was issued by -

      -

      -	      __________________________________
      -
      -
      -	      Langenfeld, ______________________
      -
      -              Proof of Authorization Key:
      -              xxxx-xxxx-xxxx-xxxx-xxxx-xxxx
      -
      -

      - -

      - -

      - The next section gives an example of a "Developer CPU - Licenses" which allows you to redistribute software built - around the Software or integrating it. Please contact sales@eGenix.com for - questions about the redistribution conditions. - -

      - - - - - -
      - -

      EGENIX.COM PROOF OF AUTHORIZATION (Example: Developer License)

      -

      -

      1. License Grant

      -

      -eGenix.com Software, Skills and Services GmbH ("eGenix.com"), having -an office at Pastor-Loeh-Str. 48, D-40764 Langenfeld, Germany, hereby -grants the Individual or Organization ("Licensee") a non-exclusive, -world-wide license to use and distribute the software listed below in -source or binary form and its associated documentation ("the -Software") under the terms and conditions of the eGenix.com Commercial -License Agreement Version 1.0.0 and to the extent authorized by this -Proof of Authorization. -

      -

      2. Covered Software

      -
      -   Software Name:		   mxODBC Python ODBC Interface
      -   Software Version:		   Version 2.0.0
      -   Software Distribution:	   mxODBC-2.0.0.zip
      -   Software Distribution MD5 Hash: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
      -   Operating System:		   any compatible operating system
      -
      -

      3. Authorizations

      -

      -

      3.1. Application Development

      -

      -eGenix.com hereby authorizes Licensee to copy, install, compile, -modify and use the Software on the following Developer Installation -Targets for the purpose of developing products using the Software as -integral part. -

      -   Developer Installation Targets: one (1) CPU
      -
      -

      3.2. Redistribution

      -

      -eGenix.com hereby authorizes Licensee to redistribute the Software -bundled with a products developed by Licensee on the Developer -Installation Targets ("the Product") subject to the terms and -conditions of the eGenix.com Commercial License Agreement for -installation and use in combination with the Product on the following -Redistribution Installation Targets, provided that: -

      - 1) Licensee shall not and shall not permit or assist any third - party to sell or distribute the Software as a separate product; -

      - 2) Licensee shall not and shall not permit any third party to -

      - (i) market, sell or distribute the Software to any end user - except subject to the eGenix Commercial License Agreement, -

      - (ii) rent, sell, lease or otherwise transfer the Software or - any part thereof or use it for the benefit of any third party, -

      - (iii) use the Software outside the Product or for any other - purpose not expressly licensed hereunder; -

      - 3) the Product does not provide functions or capabilities similar - to those of the Software itself, i.e. the Product does not - introduce commercial competition for the Software as sold by - eGenix.com. -

      -

      -   Redistribution Installation Targets:	any number of CPUs capable of
      -					running the Product and the
      -					Software
      -
      -

      4. Proof

      -

      -This Proof of Authorization was issued by -

      -

      -	      __________________________________
      -
      -
      -	      Langenfeld, ______________________
      -
      -              Proof of Authorization Key:
      -              xxxx-xxxx-xxxx-xxxx-xxxx-xxxx
      -
      -

      - -

      - -

    - -
    -
    © 2000, Copyright by eGenix.com - Software GmbH, Langengeld, Germany; All Rights Reserved. mailto: - info@egenix.com -
    - - - diff -Nru simpleparse-2.1.0a1/stt/TextTools/Constants/Sets.py simpleparse-2.2.0/stt/TextTools/Constants/Sets.py --- simpleparse-2.1.0a1/stt/TextTools/Constants/Sets.py 2006-02-18 23:35:17.000000000 +0000 +++ simpleparse-2.2.0/stt/TextTools/Constants/Sets.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,58 +0,0 @@ -# -*- coding: latin-1 -*- - -""" Constants for sets (of characters) - - Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com - Copyright (c) 2000-2003, eGenix.com Software GmbH; mailto:info@egenix.com - See the documentation for further information on copyrights, - or contact the author. All Rights Reserved. -""" -import string -from simpleparse.stt.TextTools.mxTextTools import CharSet - -# Simple character strings -a2z = 'abcdefghijklmnopqrstuvwxyz' -A2Z = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' -umlaute = 'äöüß' -Umlaute = 'ÄÖÜ' -alpha = A2Z + a2z -german_alpha = A2Z + a2z + umlaute + Umlaute -number = '0123456789' -alphanumeric = alpha + number -white = ' \t\v' -newline = '\r\n' -formfeed = '\f' -whitespace = white + newline + formfeed -any = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377' - -# Precompiled as sets, e.g. a2z_set = set(a2z) -a2z_set = '\000\000\000\000\000\000\000\000\000\000\000\000\376\377\377\007\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000' -A2Z_set = '\000\000\000\000\000\000\000\000\376\377\377\007\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000' -alpha_set = '\000\000\000\000\000\000\000\000\376\377\377\007\376\377\377\007\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000' -german_alpha_set = '\000\000\000\000\000\000\000\000\376\377\377\007\376\377\377\007\000\000\000\000\000\000\000\000\020\000@\220\020\000@\020' -number_set = '\000\000\000\000\000\000\377\003\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000' -alphanumeric_set = '\000\000\000\000\000\000\377\003\376\377\377\007\376\377\377\007\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000' -white_set = '\000\002\000\000\001\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000' -newline_set = '\000$\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000' -whitespace_set = '\000&\000\000\001\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000' -nonwhitespace_set = '\377\301\377\377\376\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377' -any_set = '\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377' - -# Compiled as CharSet instances -a2z_charset = CharSet('a-z') -A2Z_charset = CharSet('A-Z') -umlaute_charset = CharSet('äöüß') -Umlaute_charset = CharSet('ÄÖÜ') -alpha_charset = CharSet(A2Z + a2z) -german_alpha_charset = CharSet(A2Z + a2z + umlaute + Umlaute) -number_charset = CharSet('0-9') -alphanumeric_charset = CharSet(alpha + number) -white_charset = CharSet(' \t\v') -newline_charset = CharSet('\r\n') -formfeed_charset = CharSet('\f') -whitespace_charset = CharSet(white + newline + formfeed) -nonwhitespace_charset = CharSet('^' + white + newline + formfeed) -any_charset = CharSet('\000-\377') - -# Clean up -del CharSet, string diff -Nru simpleparse-2.1.0a1/stt/TextTools/Constants/TagTables.py simpleparse-2.2.0/stt/TextTools/Constants/TagTables.py --- simpleparse-2.1.0a1/stt/TextTools/Constants/TagTables.py 2006-02-18 23:35:17.000000000 +0000 +++ simpleparse-2.2.0/stt/TextTools/Constants/TagTables.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,32 +0,0 @@ -""" Constants for writing tag tables - - These are defined in mxte.h and imported here via the C extension. - See the documentation for details about the various constants. - - Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com - Copyright (c) 2000-2003, eGenix.com Software GmbH; mailto:info@egenix.com - See the documentation for further information on copyrights, - or contact the author. All Rights Reserved. - -""" -### Module init. - -def _module_init(): - - from simpleparse.stt.TextTools.mxTextTools import mxTextTools - global id2cmd - - id2cmd = {} - - # Fetch symbols from the C extension and add them to this module - ns = globals() - for name, value in vars(mxTextTools).items(): - if name[:7] == '_const_': - cmd = name[7:] - ns[cmd] = value - if value == 0: - id2cmd[0] = 'Fail/Jump' - else: - id2cmd[value] = cmd - -_module_init() diff -Nru simpleparse-2.1.0a1/stt/TextTools/COPYRIGHT simpleparse-2.2.0/stt/TextTools/COPYRIGHT --- simpleparse-2.1.0a1/stt/TextTools/COPYRIGHT 2006-02-18 23:34:33.000000000 +0000 +++ simpleparse-2.2.0/stt/TextTools/COPYRIGHT 1970-01-01 00:00:00.000000000 +0000 @@ -1,19 +0,0 @@ - --------------------------------------------------------------------- - COPYRIGHT NOTICE - --------------------------------------------------------------------- - Copyright (c) 1997-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com - Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com - Copyright (c) 2003-2006, Mike Fletcher; mailto:mcfletch@vrplumber.com - - All Rights Reserved. - - This software is protected by copyright and other intellectual - property laws and by international treaties. It may only be used - under the conditions and terms of the eGenix.com Public License - Agreement. - - You should have received a copy of the eGenix.com Public License - Agreement with this software (usually in the file LICENSE located - in the package's or software's main directory). Please write to - licenses@egenix.com to obtain a copy in case you should not have - received a copy. diff -Nru simpleparse-2.1.0a1/stt/TextTools/__init__.py simpleparse-2.2.0/stt/TextTools/__init__.py --- simpleparse-2.1.0a1/stt/TextTools/__init__.py 2006-02-19 00:50:08.000000000 +0000 +++ simpleparse-2.2.0/stt/TextTools/__init__.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,61 +0,0 @@ -""" mxTextTools - A tools package for fast text processing. - - Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com - Copyright (c) 2000-2003, eGenix.com Software GmbH; mailto:info@egenix.com - Copyright (c) 2003-2006, Mike Fletcher; mailto:mcfletch@vrplumber.com - See the documentation for further information on copyrights, - or contact the author. All Rights Reserved. -""" -from TextTools import * -from TextTools import __version__ - -### Make the types pickleable: - -# Shortcuts for pickle (reduces the pickle's length) -def _CS(definition): - return CharSet(definition) -def _TT(definition): - return TagTable(definition) -def _TS(match,translate,algorithm): - return TextSearch(match,translate,algorithm) -# Needed for backward compatibility: -def _BMS(match,translate): - return BMS(match,translate) -def _FS(match,translate): - return FS(match,translate) - -# Module init -class modinit: - - ### Register the types - import copy_reg - - def pickle_CharSet(cs): - return _CS,(cs.definition,) - def pickle_TagTable(tt): - return _TT,(tt.compiled(),) - def pickle_TextSearch(ts): - return _TS,(ts.match, ts.translate, ts.algorithm) - copy_reg.pickle(CharSetType, - pickle_CharSet, - _CS) - copy_reg.pickle(TagTableType, - pickle_TagTable, - _TT) - copy_reg.pickle(TextSearchType, - pickle_TextSearch, - _TS) - if 0: - def pickle_BMS(so): - return _BMS,(so.match,so.translate) - def pickle_FS(so): - return _FS,(so.match,so.translate) - copy_reg.pickle(BMSType, - pickle_BMS, - _BMS) - copy_reg.pickle(FSType, - pickle_FS, - _FS) - - -del modinit diff -Nru simpleparse-2.1.0a1/stt/TextTools/LICENSE simpleparse-2.2.0/stt/TextTools/LICENSE --- simpleparse-2.1.0a1/stt/TextTools/LICENSE 2006-02-18 23:34:33.000000000 +0000 +++ simpleparse-2.2.0/stt/TextTools/LICENSE 1970-01-01 00:00:00.000000000 +0000 @@ -1,106 +0,0 @@ -________________________________________________________________________ - -EGENIX.COM PUBLIC LICENSE AGREEMENT VERSION 1.1.0 -________________________________________________________________________ - -1. Introduction - - This "License Agreement" is between eGenix.com Software, Skills - and Services GmbH ("eGenix.com"), having an office at - Pastor-Loeh-Str. 48, D-40764 Langenfeld, Germany, and the - Individual or Organization ("Licensee") accessing and otherwise - using this software in source or binary form and its associated - documentation ("the Software"). - -2. License - - Subject to the terms and conditions of this eGenix.com Public - License Agreement, eGenix.com hereby grants Licensee a - non-exclusive, royalty-free, world-wide license to reproduce, - analyze, test, perform and/or display publicly, prepare derivative - works, distribute, and otherwise use the Software alone or in any - derivative version, provided, however, that the eGenix.com Public - License Agreement is retained in the Software, or in any - derivative version of the Software prepared by Licensee. - -3. NO WARRANTY - - eGenix.com is making the Software available to Licensee on an "AS - IS" basis. SUBJECT TO ANY STATUTORY WARRANTIES WHICH CAN NOT BE - EXCLUDED, EGENIX.COM MAKES NO REPRESENTATIONS OR WARRANTIES, - EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, - EGENIX.COM MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY - OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT - THE USE OF THE SOFTWARE WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. - -4. LIMITATION OF LIABILITY - - EGENIX.COM SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF - THE SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES - OR LOSS (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF - BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS - INFORMATION, OR OTHER PECUNIARY LOSS) AS A RESULT OF USING, - MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY DERIVATIVE THEREOF, - EVEN IF ADVISED OF THE POSSIBILITY THEREOF. - - SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF - INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THE ABOVE EXCLUSION OR - LIMITATION MAY NOT APPLY TO LICENSEE. - -5. Termination - - This License Agreement will automatically terminate upon a - material breach of its terms and conditions. - -6. Third Party Rights - - Any software or documentation in source or binary form provided - along with the Software that is associated with a separate license - agreement is licensed to Licensee under the terms of that license - agreement. This License Agreement does not apply to those portions - of the Software. Copies of the third party licenses are included - in the Software Distribution. - -7. General - - Nothing in this License Agreement affects any statutory rights of - consumers that cannot be waived or limited by contract. - - Nothing in this License Agreement shall be deemed to create any - relationship of agency, partnership, or joint venture between - eGenix.com and Licensee. - - If any provision of this License Agreement shall be unlawful, - void, or for any reason unenforceable, such provision shall be - modified to the extent necessary to render it enforceable without - losing its intent, or, if no such modification is possible, be - severed from this License Agreement and shall not affect the - validity and enforceability of the remaining provisions of this - License Agreement. - - This License Agreement shall be governed by and interpreted in all - respects by the law of Germany, excluding conflict of law - provisions. It shall not be governed by the United Nations - Convention on Contracts for International Sale of Goods. - - This License Agreement does not grant permission to use eGenix.com - trademarks or trade names in a trademark sense to endorse or - promote products or services of Licensee, or any third party. - - The controlling language of this License Agreement is English. If - Licensee has received a translation into another language, it has - been provided for Licensee's convenience only. - -8. Agreement - - By downloading, copying, installing or otherwise using the - Software, Licensee agrees to be bound by the terms and conditions - of this License Agreement. - - - For question regarding this License Agreement, please write to: - - eGenix.com Software, Skills and Services GmbH - Pastor-Loeh-Str. 48 - D-40764 Langenfeld - Germany diff -Nru simpleparse-2.1.0a1/stt/TextTools/Makefile.pkg simpleparse-2.2.0/stt/TextTools/Makefile.pkg --- simpleparse-2.1.0a1/stt/TextTools/Makefile.pkg 2006-02-18 23:34:33.000000000 +0000 +++ simpleparse-2.2.0/stt/TextTools/Makefile.pkg 1970-01-01 00:00:00.000000000 +0000 @@ -1,37 +0,0 @@ -all: compile install - -# Compile target. This should setup the package and compile it -# into a state ready for installation. - -compile: - cd mxTextTools; \ - if ! test -f Makefile; then $(MAKE) -f Makefile.pre.in boot; fi; \ - $(MAKE) - -# Install target. This should do the install step. If the package -# needs no further installation step (i.e. the extension is not -# needed by other extension modules), then you can leave this target -# blank. - -install: - cd mxTextTools; \ - $(MAKE) install - -# Test target. Should do some testing of the extension. Writing -# something like 'ok' or 'works' and the end of a successful run. - -test: - cd mxTextTools; \ - python test.py - -# Documentation target. Use this to have the documentation for -# an extension generated at the user. - -doc: - # ??? - -# Cleanup before distributing the package -# -distclean: - cd mxTextTools; \ - $(MAKE) distclean diff -Nru simpleparse-2.1.0a1/stt/TextTools/mxTextTools/highcommands.h simpleparse-2.2.0/stt/TextTools/mxTextTools/highcommands.h --- simpleparse-2.1.0a1/stt/TextTools/mxTextTools/highcommands.h 2006-02-18 23:40:03.000000000 +0000 +++ simpleparse-2.2.0/stt/TextTools/mxTextTools/highcommands.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,229 +0,0 @@ -/* non-recursive high-level commands - - The contract here is: - - The commands may alter any of the tag-specific variables - - errors may be indicated if encountered in childReturnCode and the error* variables - -*/ - - case MATCH_SWORDSTART: - case MATCH_SWORDEND: - case MATCH_SFINDWORD: - /* these items basically follow the low-level contract, with the - only exception being that MATCH_SFINDWORD will change childStart - */ - { - int wordstart, wordend; - int returnCode; - - DPRINTF("\nsWordStart/End/sFindWord :\n" - " in string = '%.40s'\n",text+childPosition); - childStart = childPosition; - returnCode = TE_SEARCHAPI( - match, - text, - childStart, - sliceright, - &wordstart, - &wordend - ); - if (returnCode < 0) { - childReturnCode = ERROR_CODE; - errorType = PyExc_SystemError; - errorMessage = PyString_FromFormat( - "Search-object search returned value < 0 (%i): probable bug in text processing engine", - returnCode - ); - } else if (returnCode == 0) { - /* not matched */ - DPRINTF(" (no success)\n"); - childReturnCode = FAILURE_CODE; - } else { - /* matched, adjust childPosition according to the word start/end/find requirements */ - if (command == MATCH_SWORDSTART) { - childPosition = wordstart; - } else { - childPosition = wordend; - } - if (command == MATCH_SFINDWORD) { - /* XXX logic problem with lookahead - should it reset to real childStart or - the fake one created here? */ - childStart = wordstart; - } - DPRINTF(" [%i:%i] (matched and remembered this slice)\n", - childStart,childPosition); - } - break; - } - - case MATCH_LOOP: - /* No clue what this is supposed to do, real surprising if it works... - - */ - DPRINTF("\nLoop: pre loop counter = %i\n",loopcount); - - if (loopcount > 0) { - /* we are inside a loop */ - loopcount--; - } else if (loopcount < 0) { - /* starting a new loop */ - if (PyInt_Check(match)) { - loopcount = PyInt_AS_LONG(match); - loopstart = childPosition; - } else { - childReturnCode = ERROR_CODE; - errorType = PyExc_TypeError; - errorMessage = PyString_FromFormat( - "Tag Table entry %i: expected an integer (command=Loop) got a %.50s", - index, - match->ob_type->tp_name - ); - } - } - if (childReturnCode == NULL_CODE ) { - - if (loopcount == 0) { - /* finished loop */ - loopcount = -1; - } - if (loopstart == childPosition) { - /* not matched */ - childReturnCode = FAILURE_CODE; - } else { - childReturnCode = SUCCESS_CODE; - /* on success, add match from start of the whole loop to end of current iteration? - - Would be really good if I had a clue what this is supposed to do :) . - */ - childStart = loopstart; - } - DPRINTF("\nloop: post loop counter = %i\n",loopcount); - } - break; - - case MATCH_LOOPCONTROL: - - DPRINTF("\nLoopControl: loop counter = %i, " - "setting it to = %li\n", - loopcount,PyInt_AS_LONG(match)); - - loopcount = PyInt_AS_LONG(match); - break; - - case MATCH_CALL: - case MATCH_CALLARG: - /* call and callarg actually follow the low-level contract */ - - { - PyObject *fct = NULL; - int argc = -1; - - if (!PyTuple_Check(match)) { - argc = 0; - fct = match; - } else { - argc = PyTuple_GET_SIZE(match) - 1; - if (argc < 0) { - /* how is this even possible? */ - childReturnCode = ERROR_CODE; - errorType = PyExc_TypeError; - errorMessage = PyString_FromFormat( - "Tag Table entry %i: " - "expected a tuple (fct,arg0,arg1,...)" - "(command=CallArg)", - index - ); - } else { - fct = PyTuple_GET_ITEM(match,0); - } - } - - if (childReturnCode == NULL_CODE && PyCallable_Check(fct)) { - PyObject *args; - register PyObject *w; - register int argIndex; - - DPRINTF("\nCall[Arg] :\n"); - - childStart = childPosition; - - /* Build args = (textobj,childStart,sliceright[,arg0,arg1,...]) */ - args = PyTuple_New(3 + argc); - if (!args) { - childReturnCode = ERROR_CODE; - errorType = PyExc_SystemError; - errorMessage = PyString_FromFormat( - "Unable to create argument tuple for CallArgs command at index %i", - index - ); - } else { - Py_INCREF(textobj); - PyTuple_SET_ITEM(args,0,textobj); - w = PyInt_FromLong(childStart); - if (!w){ - childReturnCode = ERROR_CODE; - errorType = PyExc_SystemError; - errorMessage = PyString_FromFormat( - "Unable to convert an integer %i to a Python Integer", - childStart - ); - } else { - PyTuple_SET_ITEM(args,1,w); - w = PyInt_FromLong(sliceright); - if (!w) { - childReturnCode = ERROR_CODE; - errorType = PyExc_SystemError; - errorMessage = PyString_FromFormat( - "Unable to convert an integer %i to a Python Integer", - sliceright - ); - } else { - PyTuple_SET_ITEM(args,2,w); - for (argIndex = 0; argIndex < argc; argIndex++) { - w = PyTuple_GET_ITEM(match,argIndex + 1); - Py_INCREF(w); - PyTuple_SET_ITEM(args,3 + argIndex,w); - } - /* now actually call the object */ - w = PyEval_CallObject(fct,args); - Py_DECREF(args); - if (w == NULL) { - childReturnCode = ERROR_CODE; - /* child's error should be allowed to propagate */ - } else if (!PyInt_Check(w)) { - childReturnCode = ERROR_CODE; - errorType = PyExc_TypeError; - errorMessage = PyString_FromFormat( - "Tag Table entry %i: matching function has to return an integer, returned a %.50s", - index, - w->ob_type->tp_name - ); - } else { - childPosition = PyInt_AS_LONG(w); - Py_DECREF(w); - - if (childStart == childPosition) { - /* not matched */ - DPRINTF(" (no success)\n"); - childReturnCode = FAILURE_CODE; - } - } - } - } - } - } else { - childReturnCode = ERROR_CODE; - errorType = PyExc_TypeError; - errorMessage = PyString_FromFormat( - "Tag Table entry %i: " - "expected a callable object, got a %.50s" - "(command=Call[Arg])", - index, - fct->ob_type->tp_name - ); - } - break; - } diff -Nru simpleparse-2.1.0a1/stt/TextTools/mxTextTools/__init__.py simpleparse-2.2.0/stt/TextTools/mxTextTools/__init__.py --- simpleparse-2.1.0a1/stt/TextTools/mxTextTools/__init__.py 2006-02-19 00:50:27.000000000 +0000 +++ simpleparse-2.2.0/stt/TextTools/mxTextTools/__init__.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,23 +0,0 @@ -""" mxTextTools - A tools package for fast text processing. - - Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com - Copyright (c) 2000-2003, eGenix.com Software GmbH; mailto:info@egenix.com - Copyright (c) 2003-2006, Mike Fletcher; mailto:mcfletch@vrplumber.com - See the documentation for further information on copyrights, - or contact the author. All Rights Reserved. -""" -from simpleparse.stt.TextTools.mxTextTools.mxTextTools import * -from simpleparse.stt.TextTools.mxTextTools.mxTextTools import __version__ - -# To maintain backward compatibility: -BMS = TextSearch -BMSType = TextSearchType -try: - TextSearch('',None,FASTSEARCH) -except: - FS = BMS - FSType = BMS -else: - def FS(match, translate=None): - return TextSearch(match, translate, FASTSEARCH) - FSType = TextSearchType diff -Nru simpleparse-2.1.0a1/stt/TextTools/mxTextTools/lowlevelcommands.h simpleparse-2.2.0/stt/TextTools/mxTextTools/lowlevelcommands.h --- simpleparse-2.1.0a1/stt/TextTools/mxTextTools/lowlevelcommands.h 2006-02-18 23:40:03.000000000 +0000 +++ simpleparse-2.2.0/stt/TextTools/mxTextTools/lowlevelcommands.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,325 +0,0 @@ -/* Low-level matching commands code fragment - - The contract here is: - - all commands move forward through the buffer - - failure to move forward indicates failure of the tag - - moving forward indicates success of the tag - - errors may be indicated if encountered in childReturnCode and the error* variables - - only childPosition should be updated otherwise - -*/ -TE_CHAR *m = TE_STRING_AS_STRING(match); -if (m == NULL) { - childReturnCode = ERROR_CODE; - errorType = PyExc_TypeError; - errorMessage = PyString_FromFormat( - "Low-level command (%i) argument in entry %i couldn't be converted to a string object, is a %.50s", - command, - index, - textobj->ob_type->tp_name - - ); -} else { - -switch (command) { - - case MATCH_ALLIN: - - { - register int ml = TE_STRING_GET_SIZE(match); - register TE_CHAR *tx = &text[childPosition]; - - DPRINTF("\nAllIn :\n" - " looking for = '%.40s'\n" - " in string = '%.40s'\n",m,tx); - - if (ml > 1) { - for (; childPosition < sliceright; tx++, childPosition++) { - register int j; - register TE_CHAR *mj = m; - register TE_CHAR ctx = *tx; - for (j=0; j < ml && ctx != *mj; mj++, j++) ; - if (j == ml) break; - } - } else if (ml == 1) { - /* one char only: use faster variant: */ - for (; childPosition < sliceright && *tx == *m; tx++, childPosition++) ; - } - break; - } - - case MATCH_ALLNOTIN: - - { - register int ml = TE_STRING_GET_SIZE(match); - register TE_CHAR *tx = &text[childPosition]; - - DPRINTF("\nAllNotIn :\n" - " looking for = '%.40s'\n" - " not in string = '%.40s'\n",m,tx); - - if (ml != 1) { - for (; childPosition < sliceright; tx++, childPosition++) { - register int j; - register TE_CHAR *mj = m; - register TE_CHAR ctx = *tx; - for (j=0; j < ml && ctx != *mj; mj++, j++) ; - if (j != ml) break; - } - } else { - /* one char only: use faster variant: */ - for (; childPosition < sliceright && *tx != *m; tx++, childPosition++) ; - } - break; - } - - case MATCH_IS: - - { - DPRINTF("\nIs :\n" - " looking for = '%.40s'\n" - " in string = '%.40s'\n",m,text+childPosition); - - if (childPosition < sliceright && *(&text[childPosition]) == *m) { - childPosition++; - } - break; - } - - case MATCH_ISIN: - - { - register int ml = TE_STRING_GET_SIZE(match); - register TE_CHAR ctx = text[childPosition]; - - DPRINTF("\nIsIn :\n" - " looking for = '%.40s'\n" - " in string = '%.40s'\n",m,text+childPosition); - - if (ml > 0 && childPosition < sliceright) { - register int j; - register TE_CHAR *mj = m; - for (j=0; j < ml && ctx != *mj; mj++, j++) ; - if (j != ml) childPosition++; - } - - break; - } - - case MATCH_ISNOTIN: - - { - register int ml = TE_STRING_GET_SIZE(match); - register TE_CHAR ctx = text[childPosition]; - - DPRINTF("\nIsNotIn :\n" - " looking for = '%.40s'\n" - " not in string = '%.40s'\n",m,text+childPosition); - - if (ml > 0 && childPosition < sliceright) { - register int j; - register TE_CHAR *mj = m; - for (j=0; j < ml && ctx != *mj; mj++, j++) ; - if (j == ml) childPosition++; - } - else - childPosition++; - - break; - } - - case MATCH_WORD: - - { - int ml1 = TE_STRING_GET_SIZE(match) - 1; - register TE_CHAR *tx = &text[childPosition + ml1]; - register int j = ml1; - register TE_CHAR *mj = &m[j]; - - DPRINTF("\nWord :\n" - " looking for = '%.40s'\n" - " in string = '%.40s'\n",m,&text[childPosition]); - - if (childPosition+ml1 >= sliceright) break; - - /* compare from right to left */ - for (; j >= 0 && *tx == *mj; - tx--, mj--, j--) ; - - if (j >= 0) /* not matched */ - childPosition = startPosition; /* reset */ - else - childPosition += ml1 + 1; - break; - } - - case MATCH_WORDSTART: - case MATCH_WORDEND: - - { - int ml1 = TE_STRING_GET_SIZE(match) - 1; - - if (ml1 >= 0) { - register TE_CHAR *tx = &text[childPosition]; - - DPRINTF("\nWordStart/End :\n" - " looking for = '%.40s'\n" - " in string = '%.40s'\n",m,tx); - - /* Brute-force method; from right to left */ - for (;;) { - register int j = ml1; - register TE_CHAR *mj = &m[j]; - - if (childPosition+j >= sliceright) { - /* reached eof: no match, rewind */ - childPosition = startPosition; - break; - } - - /* scan from right to left */ - for (tx += j; j >= 0 && *tx == *mj; - tx--, mj--, j--) ; - /* - DPRINTF("match text[%i+%i]: %c == %c\n", - childPosition,j,*tx,*mj); - */ - - if (j < 0) { - /* found */ - if (command == MATCH_WORDEND) childPosition += ml1 + 1; - break; - } - /* not found: rewind and advance one char */ - tx -= j - 1; - childPosition++; - } - } - - break; - } - -#if (TE_TABLETYPE == MXTAGTABLE_STRINGTYPE) - - /* Note: These two only work for 8-bit set strings. */ - case MATCH_ALLINSET: - - { - register TE_CHAR *tx = &text[childPosition]; - unsigned char *m = PyString_AS_STRING(match); - - DPRINTF("\nAllInSet :\n" - " looking for = set at 0x%lx\n" - " in string = '%.40s'\n",(long)match,tx); - - for (; - childPosition < sliceright && - (m[((unsigned char)*tx) >> 3] & - (1 << (*tx & 7))) > 0; - tx++, childPosition++) ; - - break; - } - - case MATCH_ISINSET: - - { - register TE_CHAR *tx = &text[childPosition]; - unsigned char *m = PyString_AS_STRING(match); - - DPRINTF("\nIsInSet :\n" - " looking for = set at 0x%lx\n" - " in string = '%.40s'\n",(long)match,tx); - - if (childPosition < sliceright && - (m[((unsigned char)*tx) >> 3] & - (1 << (*tx & 7))) > 0) - childPosition++; - - break; - } - -#endif - - case MATCH_ALLINCHARSET: - - { - int matching; - - DPRINTF("\nAllInCharSet :\n" - " looking for = CharSet at 0x%lx\n" - " in string = '%.40s'\n", - (long)match, &text[childPosition]); - - matching = mxCharSet_Match(match, - textobj, - childPosition, - sliceright, - 1); - if (matching < 0) { - childReturnCode = ERROR_CODE; - errorType = PyExc_SystemError; - errorMessage = PyString_FromFormat( - "Character set match returned value < 0 (%i): probable bug in text processing engine", - matching - ); - } else { - childPosition += matching; - } - break; - } - - case MATCH_ISINCHARSET: - - { - int test; - - DPRINTF("\nIsInCharSet :\n" - " looking for = CharSet at 0x%lx\n" - " in string = '%.40s'\n", - (long)match, &text[childPosition]); - -#if (TE_TABLETYPE == MXTAGTABLE_STRINGTYPE) - test = mxCharSet_ContainsChar(match, text[childPosition]); -#else - test = mxCharSet_ContainsUnicodeChar(match, text[childPosition]); -#endif - if (test < 0) { - childReturnCode = ERROR_CODE; - errorType = PyExc_SystemError; - errorMessage = PyString_FromFormat( - "Character set match returned value < 0 (%i): probable bug in text processing engine", - test - ); - } else if (test) { - childPosition++; - } - break; - } - default: - { - childReturnCode = ERROR_CODE; - errorType = PyExc_ValueError; - errorMessage = PyString_FromFormat( - "Unrecognised Low-Level command code %i, maximum low-level code is %i", - command, - MATCH_MAX_LOWLEVEL - ); - } -/* end of the switch, this child is finished */ -} -} /* end of the wrapping if-check */ - -/* simple determination for these commands (hence calling them low-level) */ -if (childReturnCode == NULL_CODE) { - if (childPosition > childStart) { - childReturnCode = SUCCESS_CODE; - } else { - childReturnCode = FAILURE_CODE; - } -} diff -Nru simpleparse-2.1.0a1/stt/TextTools/mxTextTools/Makefile.pre.in simpleparse-2.2.0/stt/TextTools/mxTextTools/Makefile.pre.in --- simpleparse-2.1.0a1/stt/TextTools/mxTextTools/Makefile.pre.in 2006-02-18 23:40:03.000000000 +0000 +++ simpleparse-2.2.0/stt/TextTools/mxTextTools/Makefile.pre.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,305 +0,0 @@ -# Universal Unix Makefile for Python extensions -# ============================================= - -# Short Instructions -# ------------------ - -# 1. Build and install Python (1.5 or newer). -# 2. "make -f Makefile.pre.in boot" -# 3. "make" -# You should now have a shared library. - -# Long Instructions -# ----------------- - -# Build *and install* the basic Python 1.5 distribution. See the -# Python README for instructions. (This version of Makefile.pre.in -# only withs with Python 1.5, alpha 3 or newer.) - -# Create a file Setup.in for your extension. This file follows the -# format of the Modules/Setup.dist file; see the instructions there. -# For a simple module called "spam" on file "spammodule.c", it can -# contain a single line: -# spam spammodule.c -# You can build as many modules as you want in the same directory -- -# just have a separate line for each of them in the Setup.in file. - -# If you want to build your extension as a shared library, insert a -# line containing just the string -# *shared* -# at the top of your Setup.in file. - -# Note that the build process copies Setup.in to Setup, and then works -# with Setup. It doesn't overwrite Setup when Setup.in is changed, so -# while you're in the process of debugging your Setup.in file, you may -# want to edit Setup instead, and copy it back to Setup.in later. -# (All this is done so you can distribute your extension easily and -# someone else can select the modules they actually want to build by -# commenting out lines in the Setup file, without editing the -# original. Editing Setup is also used to specify nonstandard -# locations for include or library files.) - -# Copy this file (Misc/Makefile.pre.in) to the directory containing -# your extension. - -# Run "make -f Makefile.pre.in boot". This creates Makefile -# (producing Makefile.pre and sedscript as intermediate files) and -# config.c, incorporating the values for sys.prefix, sys.exec_prefix -# and sys.version from the installed Python binary. For this to work, -# the python binary must be on your path. If this fails, try -# make -f Makefile.pre.in Makefile VERSION=1.5 installdir= -# where is the prefix used to install Python for installdir -# (and possibly similar for exec_installdir=). - -# Note: "make boot" implies "make clobber" -- it assumes that when you -# bootstrap you may have changed platforms so it removes all previous -# output files. - -# If you are building your extension as a shared library (your -# Setup.in file starts with *shared*), run "make" or "make sharedmods" -# to build the shared library files. If you are building a statically -# linked Python binary (the only solution of your platform doesn't -# support shared libraries, and sometimes handy if you want to -# distribute or install the resulting Python binary), run "make -# python". - -# Note: Each time you edit Makefile.pre.in or Setup, you must run -# "make Makefile" before running "make". - -# Hint: if you want to use VPATH, you can start in an empty -# subdirectory and say (e.g.): -# make -f ../Makefile.pre.in boot srcdir=.. VPATH=.. - - -# === Bootstrap variables (edited through "make boot") === - -# The prefix used by "make inclinstall libainstall" of core python -installdir= /usr/local - -# The exec_prefix used by the same -exec_installdir=$(installdir) - -# Source directory and VPATH in case you want to use VPATH. -# (You will have to edit these two lines yourself -- there is no -# automatic support as the Makefile is not generated by -# config.status.) -srcdir= . -VPATH= . - -# === Variables that you may want to customize (rarely) === - -# (Static) build target -TARGET= python - -# Installed python binary (used only by boot target) -PYTHON= python - -# Add more -I and -D options here -CFLAGS= $(OPT) -I$(INCLUDEPY) -I$(EXECINCLUDEPY) $(DEFS) - -# These two variables can be set in Setup to merge extensions. -# See example[23]. -BASELIB= -BASESETUP= - -# === Variables set by makesetup === - -MODOBJS= _MODOBJS_ -MODLIBS= _MODLIBS_ - -# === Definitions added by makesetup === - -# === Variables from configure (through sedscript) === - -VERSION= @VERSION@ -CC= @CC@ -LINKCC= @LINKCC@ -SGI_ABI= @SGI_ABI@ -OPT= @OPT@ -LDFLAGS= @LDFLAGS@ -LDLAST= @LDLAST@ -DEFS= @DEFS@ -LIBS= @LIBS@ -LIBM= @LIBM@ -LIBC= @LIBC@ -RANLIB= @RANLIB@ -MACHDEP= @MACHDEP@ -SO= @SO@ -LDSHARED= @LDSHARED@ -CCSHARED= @CCSHARED@ -LINKFORSHARED= @LINKFORSHARED@ -#@SET_CCC@ - -# Install prefix for architecture-independent files -prefix= /usr/local - -# Install prefix for architecture-dependent files -exec_prefix= $(prefix) - -# Uncomment the following two lines for AIX -#LINKCC= $(LIBPL)/makexp_aix $(LIBPL)/python.exp "" $(LIBRARY); $(PURIFY) $(CC) -#LDSHARED= $(LIBPL)/ld_so_aix $(CC) -bI:$(LIBPL)/python.exp - -# === Fixed definitions === - -# Shell used by make (some versions default to the login shell, which is bad) -SHELL= /bin/sh - -# Expanded directories -BINDIR= $(exec_installdir)/bin -LIBDIR= $(exec_prefix)/lib -MANDIR= $(installdir)/man -INCLUDEDIR= $(installdir)/include -SCRIPTDIR= $(prefix)/lib - -# Detailed destination directories -BINLIBDEST= $(LIBDIR)/python$(VERSION) -LIBDEST= $(SCRIPTDIR)/python$(VERSION) -INCLUDEPY= $(INCLUDEDIR)/python$(VERSION) -EXECINCLUDEPY= $(exec_installdir)/include/python$(VERSION) -LIBP= $(exec_installdir)/lib/python$(VERSION) -DESTSHARED= $(BINLIBDEST)/site-packages - -LIBPL= $(LIBP)/config - -PYTHONLIBS= $(LIBPL)/libpython$(VERSION).a - -MAKESETUP= $(LIBPL)/makesetup -MAKEFILE= $(LIBPL)/Makefile -CONFIGC= $(LIBPL)/config.c -CONFIGCIN= $(LIBPL)/config.c.in -SETUP= $(LIBPL)/Setup.local $(LIBPL)/Setup - -SYSLIBS= $(LIBM) $(LIBC) - -ADDOBJS= $(LIBPL)/python.o config.o - -# Portable install script (configure doesn't always guess right) -INSTALL= $(LIBPL)/install-sh -c -# Shared libraries must be installed with executable mode on some systems; -# rather than figuring out exactly which, we always give them executable mode. -# Also, making them read-only seems to be a good idea... -INSTALL_SHARED= ${INSTALL} -m 555 - -# === Fixed rules === - -# Default target. This builds shared libraries only -default: sharedmods - -# Build everything -all: static sharedmods - -# Build shared libraries from our extension modules -sharedmods: $(SHAREDMODS) - -# Build a static Python binary containing our extension modules -static: $(TARGET) -$(TARGET): $(ADDOBJS) lib.a $(PYTHONLIBS) Makefile $(BASELIB) - $(LINKCC) $(LDFLAGS) $(LINKFORSHARED) \ - $(ADDOBJS) lib.a $(PYTHONLIBS) \ - $(LINKPATH) $(BASELIB) $(MODLIBS) $(LIBS) $(SYSLIBS) \ - -o $(TARGET) $(LDLAST) - -install: sharedmods - if test ! -d $(DESTSHARED) ; then \ - mkdir $(DESTSHARED) ; else true ; fi - -for i in X $(SHAREDMODS); do \ - if test $$i != X; \ - then $(INSTALL_SHARED) $$i $(DESTSHARED)/$$i; \ - fi; \ - done - -# Build the library containing our extension modules -lib.a: $(MODOBJS) - -rm -f lib.a - ar cr lib.a $(MODOBJS) - -$(RANLIB) lib.a - -# This runs makesetup *twice* to use the BASESETUP definition from Setup -config.c Makefile: Makefile.pre Setup $(BASESETUP) $(MAKESETUP) - $(MAKESETUP) \ - -m Makefile.pre -c $(CONFIGCIN) Setup -n $(BASESETUP) $(SETUP) - $(MAKE) -f Makefile do-it-again - -# Internal target to run makesetup for the second time -do-it-again: - $(MAKESETUP) \ - -m Makefile.pre -c $(CONFIGCIN) Setup -n $(BASESETUP) $(SETUP) - -# Make config.o from the config.c created by makesetup -config.o: config.c - $(CC) $(CFLAGS) -c config.c - -# Setup is copied from Setup.in *only* if it doesn't yet exist -Setup: - cp $(srcdir)/Setup.in Setup - -# Make the intermediate Makefile.pre from Makefile.pre.in -Makefile.pre: Makefile.pre.in sedscript - sed -f sedscript $(srcdir)/Makefile.pre.in >Makefile.pre - -# Shortcuts to make the sed arguments on one line -P=prefix -E=exec_prefix -H=Generated automatically from Makefile.pre.in by sedscript. -L=LINKFORSHARED - -# Make the sed script used to create Makefile.pre from Makefile.pre.in -sedscript: $(MAKEFILE) - sed -n \ - -e '1s/.*/1i\\/p' \ - -e '2s%.*%# $H%p' \ - -e '/^VERSION=/s/^VERSION=[ ]*\(.*\)/s%@VERSION[@]%\1%/p' \ - -e '/^CC=/s/^CC=[ ]*\(.*\)/s%@CC[@]%\1%/p' \ - -e '/^CCC=/s/^CCC=[ ]*\(.*\)/s%#@SET_CCC[@]%CCC=\1%/p' \ - -e '/^LINKCC=/s/^LINKCC=[ ]*\(.*\)/s%@LINKCC[@]%\1%/p' \ - -e '/^OPT=/s/^OPT=[ ]*\(.*\)/s%@OPT[@]%\1%/p' \ - -e '/^LDFLAGS=/s/^LDFLAGS=[ ]*\(.*\)/s%@LDFLAGS[@]%\1%/p' \ - -e '/^LDLAST=/s/^LDLAST=[ ]*\(.*\)/s%@LDLAST[@]%\1%/p' \ - -e '/^DEFS=/s/^DEFS=[ ]*\(.*\)/s%@DEFS[@]%\1%/p' \ - -e '/^LIBS=/s/^LIBS=[ ]*\(.*\)/s%@LIBS[@]%\1%/p' \ - -e '/^LIBM=/s/^LIBM=[ ]*\(.*\)/s%@LIBM[@]%\1%/p' \ - -e '/^LIBC=/s/^LIBC=[ ]*\(.*\)/s%@LIBC[@]%\1%/p' \ - -e '/^RANLIB=/s/^RANLIB=[ ]*\(.*\)/s%@RANLIB[@]%\1%/p' \ - -e '/^MACHDEP=/s/^MACHDEP=[ ]*\(.*\)/s%@MACHDEP[@]%\1%/p' \ - -e '/^SO=/s/^SO=[ ]*\(.*\)/s%@SO[@]%\1%/p' \ - -e '/^LDSHARED=/s/^LDSHARED=[ ]*\(.*\)/s%@LDSHARED[@]%\1%/p' \ - -e '/^CCSHARED=/s/^CCSHARED=[ ]*\(.*\)/s%@CCSHARED[@]%\1%/p' \ - -e '/^SGI_ABI=/s/^SGI_ABI=[ ]*\(.*\)/s%@SGI_ABI[@]%\1%/p' \ - -e '/^$L=/s/^$L=[ ]*\(.*\)/s%@$L[@]%\1%/p' \ - -e '/^$P=/s/^$P=\(.*\)/s%^$P=.*%$P=\1%/p' \ - -e '/^$E=/s/^$E=\(.*\)/s%^$E=.*%$E=\1%/p' \ - $(MAKEFILE) >sedscript - echo "/^#@SET_CCC@/d" >>sedscript - echo "/^installdir=/s%=.*%= $(installdir)%" >>sedscript - echo "/^exec_installdir=/s%=.*%=$(exec_installdir)%" >>sedscript - echo "/^srcdir=/s%=.*%= $(srcdir)%" >>sedscript - echo "/^VPATH=/s%=.*%= $(VPATH)%" >>sedscript - echo "/^LINKPATH=/s%=.*%= $(LINKPATH)%" >>sedscript - echo "/^BASELIB=/s%=.*%= $(BASELIB)%" >>sedscript - echo "/^BASESETUP=/s%=.*%= $(BASESETUP)%" >>sedscript - -# Bootstrap target -boot: clobber - VERSION=`$(PYTHON) -c "import sys; print sys.version[:3]"`; \ - installdir=`$(PYTHON) -c "import sys; print sys.prefix"`; \ - exec_installdir=`$(PYTHON) -c "import sys; print sys.exec_prefix"`; \ - $(MAKE) -f $(srcdir)/Makefile.pre.in VPATH=$(VPATH) srcdir=$(srcdir) \ - VERSION=$$VERSION \ - installdir=$$installdir \ - exec_installdir=$$exec_installdir \ - Makefile - -# Handy target to remove intermediate files and backups -clean: - -rm -f *.o *~ - -# Handy target to remove everything that is easily regenerated -clobber: clean - -rm -f *.a tags TAGS config.c Makefile.pre $(TARGET) sedscript - -rm -f *.so *.sl so_locations - - -# Handy target to remove everything you don't want to distribute -distclean: clobber - -rm -f Makefile Setup diff -Nru simpleparse-2.1.0a1/stt/TextTools/mxTextTools/mcfpyapi.h simpleparse-2.2.0/stt/TextTools/mxTextTools/mcfpyapi.h --- simpleparse-2.1.0a1/stt/TextTools/mxTextTools/mcfpyapi.h 2006-02-18 23:40:03.000000000 +0000 +++ simpleparse-2.2.0/stt/TextTools/mxTextTools/mcfpyapi.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,219 +0,0 @@ -/* Marc-Andre's Hex version determination code */ -#ifndef PY_VERSION_HEX -# if PYTHON_API_VERSION == 1007 -# define PY_VERSION_HEX 0x010500F0 -# endif -# if PYTHON_API_VERSION == 1006 -# define PY_VERSION_HEX 0x010400F0 -# endif -# if PYTHON_API_VERSION < 1006 -# define PY_VERSION_HEX 0 -# endif -#endif - - -#if PY_HEX_VERSION < 0x02020000 -/* Python 2.2 features backported to earlier Python versions */ - -#ifndef PYSTRING_FROMFORMAT_BACKPORT -#define PYSTRING_FROMFORMAT_BACKPORT -/* PyString_FromFormat back-porting code - - There are no docs for when PyString_FromFormat shows up that I can see, - appears to be Python version 2.2.0 - - This PyString_FromFormat back-porting code is from Python 2.2.1: - Copyright (c) 2001, 2002 Python Software Foundation. - All Rights Reserved. - - Copyright (c) 2000 BeOpen.com. - All Rights Reserved. - - Copyright (c) 1995-2001 Corporation for National Research Initiatives. - All Rights Reserved. - - Copyright (c) 1991-1995 Stichting Mathematisch Centrum, Amsterdam. - All Rights Reserved. - -*/ - - -#include -PyObject * -PyString_FromFormatV(const char *format, va_list vargs) -{ - va_list count; - int n = 0; - const char* f; - char *s; - PyObject* string; - -#ifdef VA_LIST_IS_ARRAY - memcpy(count, vargs, sizeof(va_list)); -#else - count = vargs; -#endif - /* step 1: figure out how large a buffer we need */ - for (f = format; *f; f++) { - if (*f == '%') { - const char* p = f; - while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f))) - ; - - /* skip the 'l' in %ld, since it doesn't change the - width. although only %d is supported (see - "expand" section below), others can be easily - added */ - if (*f == 'l' && *(f+1) == 'd') - ++f; - - switch (*f) { - case 'c': - (void)va_arg(count, int); - /* fall through... */ - case '%': - n++; - break; - case 'd': case 'i': case 'x': - (void) va_arg(count, int); - /* 20 bytes is enough to hold a 64-bit - integer. Decimal takes the most space. - This isn't enough for octal. */ - n += 20; - break; - case 's': - s = va_arg(count, char*); - n += strlen(s); - break; - case 'p': - (void) va_arg(count, int); - /* maximum 64-bit pointer representation: - * 0xffffffffffffffff - * so 19 characters is enough. - * XXX I count 18 -- what's the extra for? - */ - n += 19; - break; - default: - /* if we stumble upon an unknown - formatting code, copy the rest of - the format string to the output - string. (we cannot just skip the - code, since there's no way to know - what's in the argument list) */ - n += strlen(p); - goto expand; - } - } else - n++; - } - expand: - /* step 2: fill the buffer */ - /* Since we've analyzed how much space we need for the worst case, - use sprintf directly instead of the slower PyOS_snprintf. */ - string = PyString_FromStringAndSize(NULL, n); - if (!string) - return NULL; - - s = PyString_AsString(string); - - for (f = format; *f; f++) { - if (*f == '%') { - const char* p = f++; - int i, longflag = 0; - /* parse the width.precision part (we're only - interested in the precision value, if any) */ - n = 0; - while (isdigit(Py_CHARMASK(*f))) - n = (n*10) + *f++ - '0'; - if (*f == '.') { - f++; - n = 0; - while (isdigit(Py_CHARMASK(*f))) - n = (n*10) + *f++ - '0'; - } - while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f))) - f++; - /* handle the long flag, but only for %ld. others - can be added when necessary. */ - if (*f == 'l' && *(f+1) == 'd') { - longflag = 1; - ++f; - } - - switch (*f) { - case 'c': - *s++ = va_arg(vargs, int); - break; - case 'd': - if (longflag) - sprintf(s, "%ld", va_arg(vargs, long)); - else - sprintf(s, "%d", va_arg(vargs, int)); - s += strlen(s); - break; - case 'i': - sprintf(s, "%i", va_arg(vargs, int)); - s += strlen(s); - break; - case 'x': - sprintf(s, "%x", va_arg(vargs, int)); - s += strlen(s); - break; - case 's': - p = va_arg(vargs, char*); - i = strlen(p); - if (n > 0 && i > n) - i = n; - memcpy(s, p, i); - s += i; - break; - case 'p': - sprintf(s, "%p", va_arg(vargs, void*)); - /* %p is ill-defined: ensure leading 0x. */ - if (s[1] == 'X') - s[1] = 'x'; - else if (s[1] != 'x') { - memmove(s+2, s, strlen(s)+1); - s[0] = '0'; - s[1] = 'x'; - } - s += strlen(s); - break; - case '%': - *s++ = '%'; - break; - default: - strcpy(s, p); - s += strlen(s); - goto end; - } - } else - *s++ = *f; - } - - end: - _PyString_Resize(&string, s - PyString_AS_STRING(string)); - return string; -} - -PyObject * -PyString_FromFormat(const char *format, ...) -{ - PyObject* ret; - va_list vargs; - -#ifdef HAVE_STDARG_PROTOTYPES - va_start(vargs, format); -#else - va_start(vargs); -#endif - ret = PyString_FromFormatV(format, vargs); - va_end(vargs); - return ret; -} -/* end PyString_FromFormat back-porting code */ -#endif /* PYSTRING_FROMFORMAT_BACKPORT */ - -#endif /* < Python 2.2 */ - diff -Nru simpleparse-2.1.0a1/stt/TextTools/mxTextTools/mxbmse.c simpleparse-2.2.0/stt/TextTools/mxTextTools/mxbmse.c --- simpleparse-2.1.0a1/stt/TextTools/mxTextTools/mxbmse.c 2006-02-18 23:40:03.000000000 +0000 +++ simpleparse-2.2.0/stt/TextTools/mxTextTools/mxbmse.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,218 +0,0 @@ -/* - mxbmse -- Fast Boyer Moore Search Algorithm (Version 0.9) - - The implementation is reentrant and thread safe. While the - general ideas behind the Boyer Moore algorithm are in the public - domain, this implementation falls under the following copyright: - - Copyright (c) 1997-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com - Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com - - All Rights Reserved - - See the documentation for copying information or contact the author - (mal@lemburg.com). -*/ - -/* to turn on the debugging printfs (DPRINTF):*/ -/* #define MAL_DEBUG */ - -/* Logging file used by debugging facility */ -#ifndef MAL_DEBUG_OUTPUTFILE -# define MAL_DEBUG_OUTPUTFILE "mxTextSearch.log" -#endif - -#ifdef MAL_DEBUG_WITH_PYTHON -# include "mx.h" -#endif - -#include "mxstdlib.h" -#include "mxbmse.h" - -/* --- Fast Boyer-Moore Implementation (8-bit) ---------------------------- */ - -mxbmse_data *bm_init(char *match, - int match_len) -{ - mxbmse_data *c; - int i; - BM_SHIFT_TYPE *shift; - char *m; - - c = newstruct(mxbmse_data); - c->match = match; - c->match_len = match_len; - c->eom = match + match_len - 1; - - /* Length 1 matching does not use a shift table */ - if (match_len == 1) - return c; - - /* Init shift table */ - for ( shift = c->shift, i = 256; i > 0; i--, shift++ ) - *shift = (BM_SHIFT_TYPE) match_len; - - DPRINTF("shift table for match='%s'\n",match); - for ( shift = c->shift, m = match, i = match_len - 1; - i >= 0; - i--, m++ ) { - shift[ (unsigned char) *m ] = (BM_SHIFT_TYPE) i; - DPRINTF(" char = '%c' shift = %i\n", *m, i); - } - - return c; -} - -void bm_free(mxbmse_data *c) -{ - if (c) - free(c); -} - -int bm_search(mxbmse_data *c, - char *text, - int start, - int text_len) -{ - register char *pt; - register char *eot = text + text_len; - - /* Error check */ - if (c == NULL) - return -1; - - /* Init text pointer */ - pt = text + start + c->match_len - 1; - - DPRINTF("Init : %2i %20.20s \t text: %2i %20.20s\n", - c->match_len,c->match,start,text+start); - - if (c->match_len > 1) - for (;;) { - register char *pm; - - pm = c->eom; - - for (;pt < eot && *pt != *pm; - pt += c->shift[(unsigned char) *pt]); - - if (pt >= eot) - break; - - /* First char matches.. what about the others ? */ - { - register int im = c->match_len; - - do { - DPRINTF("=match: %2i '%20.20s' \t text: '%20.20s'\n", - im,pm,pt); - if (--im == 0) - /* Match */ - return pt - text + c->match_len; - pt--; - pm--; - } while (*pt == *pm); - - /* Mismatch after match: use shift-table */ - { - register int a,b; - - a = c->shift[(unsigned char) *pt]; - b = c->match_len - im + 1; - DPRINTF("!match: %2i '%20.20s' \t text: '%20.20s' " - "(sh=%i)\n", - im,pm,pt,max(a,b)); - pt += (a > b) ? a : b; - } - } - - } - - /* Special case: matching string has length 1 */ - else { - register char m = *c->eom; - - for (;pt < eot; pt++) - if (*pt == m) - /* Match */ - return pt - text + 1; - } - - return start; /* no match */ -} - -/* bm search using the translate table -- 45% slower */ - -int bm_tr_search(mxbmse_data *c, - char *text, - int start, - int text_len, - char *tr) -{ - register char *pt; - register char *eot = text + text_len; - - /* Error check */ - if (c == NULL) - return -1; - - /* Init text pointer */ - pt = text + start + c->match_len - 1; - - DPRINTF("Init : %2i '%20.20s' \t text: %2i '%20.20s'\n", - c->match_len,c->match,start,text+start); - - if (c->match_len > 1) - for (;;) { - register char *pm; - - pm = c->eom; - - for (;pt < eot && tr[(unsigned char) *pt] != *pm; - pt += c->shift[(unsigned char) tr[(unsigned char) *pt]]); - - if (pt >= eot) - break; - - /* First char matches.. what about the others ? */ - { - register int im = c->match_len; - - do { - DPRINTF("=match: %2i '%20.20s' \t text: '%20.20s'\n", - im,pm,pt); - if (--im == 0) - /* Match */ - return pt - text + c->match_len; - pt--; - pm--; - } while (tr[(unsigned char) *pt] == *pm); - - /* Mismatch after match: use shift-table */ - { - register int a,b; - - a = c->shift[(unsigned char) tr[(unsigned char) *pt]]; - b = c->match_len - im + 1; - DPRINTF("!match: %2i '%20.20s' \t text: '%20.20s' " - "(sh=%i)\n", - im,pm,pt,max(a,b)); - pt += (a > b)?a:b; - } - } - - } - - /* Special case: matching string has length 1 */ - else { - register char m = *c->eom; - - for (;pt < eot; pt++) - if (*pt == m) - /* Match */ - return pt - text + 1; - } - - return start; /* no match */ -} - diff -Nru simpleparse-2.1.0a1/stt/TextTools/mxTextTools/mxbmse.h simpleparse-2.2.0/stt/TextTools/mxTextTools/mxbmse.h --- simpleparse-2.1.0a1/stt/TextTools/mxTextTools/mxbmse.h 2006-02-18 23:40:03.000000000 +0000 +++ simpleparse-2.2.0/stt/TextTools/mxTextTools/mxbmse.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,65 +0,0 @@ -#ifndef MXBMSE_H -#define MXBMSE_H -/* - mxbmse -- Fast Boyer Moore Search Algorithm (Version 0.8) - - The implementation is reentrant and thread safe. While the - general idea behind the Boyer Moore algorithm are in the public - domain, this implementation falls under the following copyright: - - Copyright (c) 1997-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com - Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com - - All Rights Reserved - - See the documentation for copying information or contact the author - (mal@lemburg.com). - -*/ - -#ifdef __cplusplus -extern "C" { -#endif - -/* --- Fast Boyer-Moore Implementation (8-bit) ---------------------------- */ - -/* sanity check switches */ -/*#define SAFER 1*/ - -/* SHIFT must have enough bits to store len(match) - - using 'char' here makes the routines run 15% slower than - with 'int', on the other hand, 'int' is at least 4 times - larger than 'char' -*/ -#ifndef BM_SHIFT_TYPE -# define BM_SHIFT_TYPE int -#endif - -typedef struct { - char *match; - int match_len; - char *eom; - char *pt; - BM_SHIFT_TYPE shift[256]; /* char-based shift table */ -} mxbmse_data; - -extern mxbmse_data *bm_init(char *match, - int match_len); -extern void bm_free(mxbmse_data *c); -extern int bm_search(mxbmse_data *c, - char *text, - int start, - int stop); -extern int bm_tr_search(mxbmse_data *c, - char *text, - int start, - int stop, - char *tr); - -#define BM_MATCH_LEN(bm) ((mxbmse_data *)bm)->match_len - -/* EOF */ -#ifdef __cplusplus -} -#endif -#endif diff -Nru simpleparse-2.1.0a1/stt/TextTools/mxTextTools/mx.h simpleparse-2.2.0/stt/TextTools/mxTextTools/mx.h --- simpleparse-2.1.0a1/stt/TextTools/mxTextTools/mx.h 2006-02-18 23:40:03.000000000 +0000 +++ simpleparse-2.2.0/stt/TextTools/mxTextTools/mx.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,553 +0,0 @@ -#ifndef MX_H -#define MX_H - -/* - mx -- Marc's eXtension modules for Python: basic macros - - This file is only meant to be included by the extension modules. - DO NOT include it in the extension module's header file, since it - will definitely cause troubles then. - - To enable debugging ceratin things, define one of these before - including this file: - - MAL_REF_DEBUG -- debug reference counts (Py_MY_xxx) [this file] - MAL_DEBUG -- enable debug output (DPRINTF) [mxstdlib.h] - MAL_MEM_DEBUG -- enable malloc output (new,cnew,free,...) [mxstdlib.h] - - Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com - Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com - See the documentation for further copyright information or contact - the author. - -*/ - -/* --- Platform or compiler specific tweaks ------------------------------- */ - -/* Add some platform specific symbols to enable work-arounds for the - static forward declaration of type definitions; note that the GNU C - compiler does not have this problem. - - Many thanks to all who have contributed to this list. - -*/ -#if (!defined(__GNUC__)) -# if (defined(NeXT) || defined(sgi) || defined(_AIX) || (defined(__osf__) && defined(__DECC)) || defined(TrueCompaq64) || defined(__VMS)) -# define BAD_STATIC_FORWARD -# endif -#endif - -/* Some more tweaks for various platforms. */ - -/* VMS needs this define. Thanks to Jean-François PIÉRONNE */ -#if defined(__VMS) -# define __SC__ -#endif - -/* xlC on AIX doesn't like the Python work-around for static forwards - in ANSI mode (default), so we switch on extended mode. Thanks to - Albert Chin-A-Young */ -#if defined(__xlC__) -# pragma langlvl extended -#endif - -/* --- Standard header files ---------------------------------------------- */ - -/* Include the generic mx header file */ -#include "mxh.h" - -/* Include nearly all Python symbols & definitions */ -#include "Python.h" - -/* Include other standard stuff */ -#include "mxstdlib.h" - -/* Include Python backward compatibility stuff */ -#include "mxpyapi.h" - -/* --- Compiler support --------------------------------------------------- */ - -/* Support for compilers which don't like static forward declarations - of arrays; Python 2.3 removed the support for BAD_STATIC_FORWARD - which is why we now use our own little helpers here. */ -#undef staticforward -#undef statichere -#ifdef BAD_STATIC_FORWARD -# define staticforward extern -# define statichere static -#else -# define staticforward static -# define statichere static -#endif - -/* --- Declare macros ----------------------------------------------------- */ - -#define Py_NONE (Py_INCREF(Py_None),Py_None) - -#ifdef MAL_REF_DEBUG -# define printref(x) printf("* refcount for "#x" = %i\n",(long) x->ob_refcnt); -#else -# define printref(x) -#endif - -/* --- Error handling ----------------------------------------------------- */ - -#define Py_Do(x) {if (!(x)) goto onError;} -#define Py_ReturnOnError(errortype,errorstr) {PyErr_SetString(errortype,errorstr);return NULL;} - -#define Py_Assert(x,errortype,errorstr) {if (!(x)) {PyErr_SetString(errortype,errorstr);goto onError;}} -#define Py_AssertWithArg(x,errortype,errorstr,a1) {if (!(x)) {PyErr_Format(errortype,errorstr,a1);goto onError;}} -#define Py_AssertWith2Args(x,errortype,errorstr,a1,a2) {if (!(x)) {PyErr_Format(errortype,errorstr,a1,a2);goto onError;}} -#define Py_AssertWith3Args(x,errortype,errorstr,a1,a2,a3) {if (!(x)) {PyErr_Format(errortype,errorstr,a1,a2,a3);goto onError;}} - -#define Py_Error(errortype,errorstr) {PyErr_SetString(errortype,errorstr);goto onError;} -#define Py_ErrorWithArg(errortype,errorstr,a1) {PyErr_Format(errortype,errorstr,a1);goto onError;} -#define Py_ErrorWith2Args(errortype,errorstr,a1,a2) {PyErr_Format(errortype,errorstr,a1,a2);goto onError;} -#define Py_ErrorWith3Args(errortype,errorstr,a1,a2,a3) {PyErr_Format(errortype,errorstr,a1,a2,a3);goto onError;} - -/* --- Reference counting ------------------------------------------------- */ - -#ifdef MAL_REF_DEBUG - -static void mx_Py_INCREF(PyObject *v, - char *name, - char *filename, - int lineno) -{ - if (!Py_DebugFlag) { - Py_XINCREF(v); - return; - } - if (!v) - mxDebugPrintf("[%s:%5i] Py_XINCREF( %-8s == NULL );\n", - filename,lineno,name); - else { - Py_INCREF(v);; - mxDebugPrintf("[%s:%5i] Py_XINCREF( %-8s at 0x%x [%s]); " - "new refcount = %i\n", - filename,lineno,name,(int)v,v->ob_type->tp_name, - v->ob_refcnt); - } -} - -static void mx_Py_DECREF(PyObject *v, - char *name, - char *filename, - int lineno) -{ - if (!Py_DebugFlag) { - Py_XDECREF(v); - return; - } - if (!v) - mxDebugPrintf("[%s:%5i] Py_XDECREF( %-8s == NULL );\n", - filename,lineno,name); - else { - int refcnt = v->ob_refcnt; - Py_DECREF(v); - if (refcnt <= 1) - mxDebugPrintf("[%s:%5i] Py_XDECREF( %-8s at 0x%x [%s]); " - "object deleted\n", - filename,lineno,name,(int)v,v->ob_type->tp_name); - else - mxDebugPrintf("[%s:%5i] Py_XDECREF( %-8s at 0x%x [%s]); " - "new refcount = %i\n", - filename,lineno,name,(int)v,v->ob_type->tp_name, - v->ob_refcnt); - } -} - -static void mx_Py_PRINT_REFCOUNT(PyObject *v, - char *name, - char *filename, - int lineno) -{ - if (!v) - mxDebugPrintf("[%s:%5i] Py_PRINT_REFCOUNT( %-8s == NULL );\n", - filename,lineno,name); - else { - mxDebugPrintf("[%s:%5i] Py_PRINT_REFCOUNT( %-8s at 0x%x [%s]) = %i;\n", - filename,lineno,name,(int)v,v->ob_type->tp_name, - v->ob_refcnt); - } -} - -# undef Py_INCREF -# define Py_INCREF(x) mx_Py_INCREF((PyObject *)x,#x,__FILE__,__LINE__) -# undef Py_DECREF -# define Py_DECREF(x) mx_Py_DECREF((PyObject *)x,#x,__FILE__,__LINE__) -# undef Py_XINCREF -# define Py_XINCREF(x) mx_Py_INCREF((PyObject *)x,#x,__FILE__,__LINE__) -# undef Py_XDECREF -# define Py_XDECREF(x) mx_Py_DECREF((PyObject *)x,#x,__FILE__,__LINE__) -# define Py_DELETE(x) {if (x->ob_refcnt > 1) mxDebugPrintf("[%s:%5i] Py_DELETE( "#x" ) WARNING: Refcount = %i > 1\n",__FILE__,__LINE__,(int)x->ob_refcnt);Py_DECREF(x);} -# define Py_PRINT_REFCOUNT(x) mx_Py_PRINT_REFCOUNT((PyObject *)x,#x,__FILE__,__LINE__) -#else -# define Py_DELETE(x) Py_DECREF(x) -# define Py_PRINT_REFCOUNT(x) -#endif - -#define Py_DEC_REF(x) {Py_XDECREF(x); x=0;} /* doing this once too often doesn't hurt */ - -/* Unreference a Python object. This is only used in Python debug - builds and needed to keep track of all allocated references. Use in - object constructors or free list implementations. */ -#ifndef _Py_DEC_REFTOTAL -# ifdef Py_REF_DEBUG -# define _Py_DEC_REFTOTAL _Py_RefTotal-- -# else -# define _Py_DEC_REFTOTAL -# endif -#endif -#define mxPy_UNREF(x) _Py_DEC_REFTOTAL - -/* --- Argument passing and checking -------------------------------------- */ - -/* No arguments expected; also use Py_MethodListEntryNoArgs() for this - kind of fct; this check is no longer needed in Python 2.3 and - later */ -#if PY_VERSION_HEX >= 0x02030000 -# define Py_NoArgsCheck() {if (0) goto onError;} -#else -# define Py_NoArgsCheck() {if (!PyArg_NoArgs(args)) goto onError;} -#endif - -/* For functions with old style args (Py_MethodListEntrySingleArg) */ -#define Py_GetArgObject(a) {a = args; if (!a) {PyErr_SetString(PyExc_TypeError,"function/method requires an argument"); goto onError;}} -#define Py_GetSingleArg(format,a1) {if (!PyArg_Parse(args,format,&a1)) goto onError;} - -/* For functions with new style args: */ -#define Py_GetArg(format,a1) {if (!PyArg_ParseTuple(args,format,&a1)) goto onError;} -#define Py_Get2Args(format,a1,a2) {if (!PyArg_ParseTuple(args,format,&a1,&a2)) goto onError;} -#define Py_Get3Args(format,a1,a2,a3) {if (!PyArg_ParseTuple(args,format,&a1,&a2,&a3)) goto onError;} -#define Py_Get4Args(format,a1,a2,a3,a4) {if (!PyArg_ParseTuple(args,format,&a1,&a2,&a3,&a4)) goto onError;} -#define Py_Get5Args(format,a1,a2,a3,a4,a5) {if (!PyArg_ParseTuple(args,format,&a1,&a2,&a3,&a4,&a5)) goto onError;} -#define Py_Get6Args(format,a1,a2,a3,a4,a5,a6) {if (!PyArg_ParseTuple(args,format,&a1,&a2,&a3,&a4,&a5,&a6)) goto onError;} -#define Py_Get7Args(format,a1,a2,a3,a4,a5,a6,a7) {if (!PyArg_ParseTuple(args,format,&a1,&a2,&a3,&a4,&a5,&a6,&a7)) goto onError;} -#define Py_Get8Args(format,a1,a2,a3,a4,a5,a6,a7,a8) {if (!PyArg_ParseTuple(args,format,&a1,&a2,&a3,&a4,&a5,&a6,&a7,&a8)) goto onError;} - -/* For functions with keywords -- the first macro parameter must be - the keywords array given as e.g. - - static char *keywords[] = {"first","second","third", 0}; - - with an entry for every argument (in the correct order). The - functions must be included in the method list using - Py_MethodWithKeywordsListEntry() and be declared as - Py_C_Function_WithKeywords(). - -*/ -#define Py_KeywordGetArg(keywords,format,a1) {if (!PyArg_ParseTupleAndKeywords(args,kws,format,keywords,&a1)) goto onError;} -#define Py_KeywordGet2Args(keywords,format,a1,a2) {if (!PyArg_ParseTupleAndKeywords(args,kws,format,keywords,&a1,&a2)) goto onError;} -#define Py_KeywordGet3Args(keywords,format,a1,a2,a3) {if (!PyArg_ParseTupleAndKeywords(args,kws,format,keywords,&a1,&a2,&a3)) goto onError;} -#define Py_KeywordGet4Args(keywords,format,a1,a2,a3,a4) {if (!PyArg_ParseTupleAndKeywords(args,kws,format,keywords,&a1,&a2,&a3,&a4)) goto onError;} -#define Py_KeywordGet5Args(keywords,format,a1,a2,a3,a4,a5) {if (!PyArg_ParseTupleAndKeywords(args,kws,format,keywords,&a1,&a2,&a3,&a4,&a5)) goto onError;} -#define Py_KeywordGet6Args(keywords,format,a1,a2,a3,a4,a5,a6) {if (!PyArg_ParseTupleAndKeywords(args,kws,format,keywords,&a1,&a2,&a3,&a4,&a5,&a6)) goto onError;} -#define Py_KeywordGet7Args(keywords,format,a1,a2,a3,a4,a5,a6,a7) {if (!PyArg_ParseTupleAndKeywords(args,kws,format,keywords,&a1,&a2,&a3,&a4,&a5,&a6,&a7)) goto onError;} -#define Py_KeywordGet8Args(keywords,format,a1,a2,a3,a4,a5,a6,a7,a8) {if (!PyArg_ParseTupleAndKeywords(args,kws,format,keywords,&a1,&a2,&a3,&a4,&a5,&a6,&a7,&a8)) goto onError;} - -/* New style macros fof functions supporting keywords -- the C - variable names are used as template for the keyword list, i.e. they - must match the Python keyword parameter names. - - Note that format strings with special parameters (e.g. "#s") are - not allowed since they would cause the keyword list to be out of - sync. - - The functions must be included in the method list using - Py_MethodWithKeywordsListEntry() and be declared as - Py_C_Function_WithKeywords(). - - Example: - - Py_C_Function_WithKeywords( - myfunction, - "myfunction(filename,dupkeys=0,filemode=0,sectorsize=512)\n\n" - "Returns a myobject" - ) - { - char *filename; - int sectorsize = 512; - int dupkeys = 0; - int filemode = 0; - - Py_KeywordsGet4Args("s|iii", - filename,dupkeys,filemode,sectorsize); - - return (PyObject *)myobject_New(filename, - filemode, - sectorsize, - dupkeys); - onError: - return NULL; - } - -*/ -#define Py_KeywordsGetArg(format,a1) {static char *kwslist[] = {#a1,NULL}; if (!PyArg_ParseTupleAndKeywords(args,kws,format,kwslist,&a1)) goto onError;} -#define Py_KeywordsGet2Args(format,a1,a2) {static char *kwslist[] = {#a1,#a2,NULL}; if (!PyArg_ParseTupleAndKeywords(args,kws,format,kwslist,&a1,&a2)) goto onError;} -#define Py_KeywordsGet3Args(format,a1,a2,a3) {static char *kwslist[] = {#a1,#a2,#a3,NULL}; if (!PyArg_ParseTupleAndKeywords(args,kws,format,kwslist,&a1,&a2,&a3)) goto onError;} -#define Py_KeywordsGet4Args(format,a1,a2,a3,a4) {static char *kwslist[] = {#a1,#a2,#a3,#a4,NULL}; if (!PyArg_ParseTupleAndKeywords(args,kws,format,kwslist,&a1,&a2,&a3,&a4)) goto onError;} -#define Py_KeywordsGet5Args(format,a1,a2,a3,a4,a5) {static char *kwslist[] = {#a1,#a2,#a3,#a4,#a5,NULL}; if (!PyArg_ParseTupleAndKeywords(args,kws,format,kwslist,&a1,&a2,&a3,&a4,&a5)) goto onError;} -#define Py_KeywordsGet6Args(format,a1,a2,a3,a4,a5,a6) {static char *kwslist[] = {#a1,#a2,#a3,#a4,#a5,#a6,NULL}; if (!PyArg_ParseTupleAndKeywords(args,kws,format,kwslist,&a1,&a2,&a3,&a4,&a5,&a6)) goto onError;} -#define Py_KeywordsGet7Args(format,a1,a2,a3,a4,a5,a6,a7) {static char *kwslist[] = {#a1,#a2,#a3,#a4,#a5,#a6,#a7,NULL}; if (!PyArg_ParseTupleAndKeywords(args,kws,format,kwslist,&a1,&a2,&a3,&a4,&a5,&a6,&a7)) goto onError;} -#define Py_KeywordsGet8Args(format,a1,a2,a3,a4,a5,a6,a7,a8) {static char *kwslist[] = {#a1,#a2,#a3,#a4,#a5,#a6,#a7,#a8,NULL}; if (!PyArg_ParseTupleAndKeywords(args,kws,format,kwslist,&a1,&a2,&a3,&a4,&a5,&a6,&a7,&a8)) goto onError;} - -/* --- Returning values to Python ----------------------------------------- */ - -/* XXX Don't always work: every time you have an 'O' in the BuildValue format - string, you need to DECREF the variable *after* the tuple has been - built !!! -*/ - -#define Py_ReturnNone() {Py_INCREF(Py_None);return Py_None;} -#define Py_ReturnTrue() {Py_INCREF(Py_True);return Py_True;} -#define Py_ReturnFalse() {Py_INCREF(Py_False);return Py_False;} -#define Py_ReturnArg(format,a1) return Py_BuildValue(format,a1); -#define Py_Return Py_ReturnArg -#define Py_Return2Args(format,a1,a2) return Py_BuildValue(format,a1,a2); -#define Py_Return2 Py_Return2Args -#define Py_Return3Args(format,a1,a2,a3) return Py_BuildValue(format,a1,a2,a3); -#define Py_Return3 Py_Return3Args -#define Py_Return4Args(format,a1,a2,a3) return Py_BuildValue(format,a1,a2,a3,a4); -#define Py_Return5Args(format,a1,a2,a3) return Py_BuildValue(format,a1,a2,a3,a4,a5); -#define Py_Return6Args(format,a1,a2,a3) return Py_BuildValue(format,a1,a2,a3,a4,a5,a6); -#define Py_Return7Args(format,a1,a2,a3) return Py_BuildValue(format,a1,a2,a3,a4,a5,a6,a7); - -/* Build values */ - -#define Py_BuildNone() Py_NONE -#define Py_Build(format,x) Py_BuildValue(format,x) -#define Py_Build2(format,x,y) Py_BuildValue(format,x,y) -#define Py_Build3(format,x,y,z) Py_BuildValue(format,x,y,z) - -/* --- Declaring Python builtin functions/methods ------------------------- */ - -/* Declare C function/method fct, having docstring docstr; may use vargargs */ -#define Py_C_Function(fct,docstr) \ - static char fct##_docstring[] = docstr;\ - static PyObject *fct(PyObject *self, PyObject *args) - -/* Declare C function/method fct, having keywords keywordsarray and a - docstring docstr; may use vargargs & keywords */ -#define Py_C_Function_WithKeywords(fct,docstr) \ - static char fct##_docstring[] = docstr;\ - static PyObject *fct(PyObject *self, PyObject *args, PyObject *kws) - -/* These declare: self -- instance pointer for methods, NULL for functions - args -- argument tuple - kws -- keywords dict (if applicable) - plus as statics: - _docstring -- the docstring as given - _keywords -- the keyword array as given - - Note: use the Py_GetArg macros for functions without keywords, - and Py_KeywordGetArg macros for functions with keywords -*/ - -/* --- Method list entries for builtin functions/methods ------------------ */ - -/* Add a C function/method cname to the module dict as pyname; no - doc-string */ -#define Py_MethodListEntryAny(pyname,cname) {pyname,(PyCFunction)cname,METH_VARARGS} - -/* Add a C function/method cname to the module dict as pyname; the - function can use varargs */ -#define Py_MethodListEntry(pyname,cname) {pyname,(PyCFunction)cname,METH_VARARGS,cname##_docstring} - -/* Add a C function/method cname to the module dict as pyname; the - function takes no args; in Python 2.3 a new flag was added for - these which implements the no args check in the interpreter - itself. */ -#ifdef METH_NOARGS -# define Py_MethodListEntryNoArgs(pyname,cname) {pyname,(PyCFunction)cname,METH_NOARGS,cname##_docstring} -#else -# define Py_MethodListEntryNoArgs(pyname,cname) {pyname,(PyCFunction)cname,0,cname##_docstring} -#endif - -/* Add a C function/method cname to the module dict as pyname; the - function takes one argument: the object is passed in directly - (without wrapping it into a tuple first), i.e. don't use - the Py_GetArg-macros or PyArg_ParseTuple(). */ -#define Py_MethodListEntrySingleArg(pyname,cname) {pyname,(PyCFunction)cname,0,cname##_docstring} - -/* Add a C function/method that uses keywords to the module dict */ -#define Py_MethodWithKeywordsListEntry(pyname,cname) {pyname,(PyCFunction)cname,METH_VARARGS | METH_KEYWORDS,cname##_docstring} - - -/* --- Sequence slicing --------------------------------------------------- */ - -/* Check a given slice and apply the usual rules for negative indices */ -#define Py_CheckSequenceSlice(len,start,stop) { \ - if (stop > len) \ - stop = len; \ - else { \ - if (stop < 0) \ - stop += len; \ - if (stop < 0) \ - stop = 0; \ - } \ - if (start < 0) { \ - start += len; \ - if (start < 0) \ - start = 0; \ - } \ - if (stop < start) \ - start = stop; \ - } - -/* --- Number macros ------------------------------------------------------ */ - -/* Test for PyFloat_AsDouble() compatible object */ -#define PyFloat_Compatible(obj) \ - (obj->ob_type->tp_as_number->nb_float != NULL) - -/* --- Text macros -------------------------------------------------------- */ - -/* Check a given text slice and apply the usual rules for negative - indices */ -#define Py_CheckBufferSlice(textlen,start,stop) \ - Py_CheckSequenceSlice(textlen,start,stop) - -/* Dito for string objects */ -#define Py_CheckStringSlice(textobj,start,stop) \ - Py_CheckSequenceSlice(PyString_GET_SIZE(textobj),start,stop) - -/* For b/w compatibility */ -#define Py_CheckSlice(textobj,start,stop) \ - Py_CheckStringSlice(textobj,start,stop) - -/* Dito for Unicode objects */ -#ifdef PyUnicode_GET_SIZE -# define Py_CheckUnicodeSlice(unicode,start,stop) \ - Py_CheckSequenceSlice(PyUnicode_GET_SIZE(unicode),start,stop) -#endif - -/* This assumes that fixed is a constant char array; the strcmp - function is only called in case the attribute name length exceeds - 10 characters and the first 10 characters match; optimizing - compilers should eliminate any unused parts of this comparison - automatically. - - Note: The latest egcs compiler warns about the subscripts being out - of range for shorter fixed strings; since no code is generated for - those comparisons, these warning can safely be ignored. Still, they - are annoying. See the Py_StringsCompareEqual() macro below for a - way to work around this. - -*/ -#define Py_StringsCompareEqualEx(var,fixed,fixedsize) \ - (var[0] == fixed[0] && \ - (fixed[0] == 0 || \ - (fixedsize >= 1 && (var[1] == fixed[1] && \ - (fixed[1] == 0 || \ - (fixedsize >= 2 && (var[2] == fixed[2] && \ - (fixed[2] == 0 || \ - (fixedsize >= 3 && (var[3] == fixed[3] && \ - (fixed[3] == 0 || \ - (fixedsize >= 4 && (var[4] == fixed[4] && \ - (fixed[4] == 0 || \ - (fixedsize >= 5 && (var[5] == fixed[5] && \ - (fixed[5] == 0 || \ - (fixedsize >= 6 && (var[6] == fixed[6] && \ - (fixed[6] == 0 || \ - (fixedsize >= 7 && (var[7] == fixed[7] && \ - (fixed[7] == 0 || \ - (fixedsize >= 8 && (var[8] == fixed[8] && \ - (fixed[8] == 0 || \ - (fixedsize >= 9 && (var[9] == fixed[9] && \ - (fixed[9] == 0 || \ - (fixedsize >= 10 && \ - strcmp(&var[10],&fixed[10]) == 0 \ - )))))))))))))))))))))))))))))) - -/* This assumes that fixed is a constant char array. - - The appended string snippet is to shut up the warnings produced by - newer egcs/gcc compilers about offsets being outside bounds. - - Note that some compilers do the inlining by themselves or don't - like the above trick (OpenVMS is one such platform). For these we - simply use the standard way. - -*/ - -#ifndef __VMS -# define Py_StringsCompareEqual(var,fixed) \ - Py_StringsCompareEqualEx(var,fixed"\0\0\0\0\0\0\0\0\0\0",sizeof(fixed)) -#else -# define Py_StringsCompareEqual(var,fixed) (strcmp(var, fixed) == 0) -#endif - -/* Fast character set member check; set must be a "static unsigned - *char set" array of exactly 32 bytes length generated with - TextTools.set() */ -#define Py_CharInSet(chr,set) \ - (((unsigned char)(set)[(unsigned char)(chr) >> 3] & \ - (1 << ((unsigned char)(chr) & 7))) != 0) - -/* --- Macros for getattr ------------------------------------------------- */ - -/* Compares var to name and returns 1 iff they match. - - This assumes that name is a constant char array. */ - -#define Py_WantAttr(var,name) Py_StringsCompareEqual(var,name) - -/* --- Module init helpers ------------------------------------------------ */ - -/* Helper for startup type object initialization */ - -#define PyType_Init(x) \ -{ \ - x.ob_type = &PyType_Type; \ - Py_Assert(x.tp_basicsize >= (int)sizeof(PyObject), \ - PyExc_SystemError, \ - "Internal error: tp_basicsize of "#x" too small");\ -} - -/* Error reporting for module init functions */ - -#define Py_ReportModuleInitError(modname) { \ - PyObject *exc_type, *exc_value, *exc_tb; \ - PyObject *str_type, *str_value; \ - \ - /* Fetch error objects and convert them to strings */ \ - PyErr_Fetch(&exc_type, &exc_value, &exc_tb); \ - if (exc_type && exc_value) { \ - str_type = PyObject_Str(exc_type); \ - str_value = PyObject_Str(exc_value); \ - } \ - else { \ - str_type = NULL; \ - str_value = NULL; \ - } \ - /* Try to format a more informative error message using the \ - original error */ \ - if (str_type && str_value && \ - PyString_Check(str_type) && PyString_Check(str_value)) \ - PyErr_Format( \ - PyExc_ImportError, \ - "initialization of module "modname" failed " \ - "(%s:%s)", \ - PyString_AS_STRING(str_type), \ - PyString_AS_STRING(str_value)); \ - else \ - PyErr_SetString( \ - PyExc_ImportError, \ - "initialization of module "modname" failed"); \ - Py_XDECREF(str_type); \ - Py_XDECREF(str_value); \ - Py_XDECREF(exc_type); \ - Py_XDECREF(exc_value); \ - Py_XDECREF(exc_tb); \ -} - -/* --- SWIG addons -------------------------------------------------------- */ - -/* Throw this error after having set the correct Python exception - using e.g. PyErr_SetString(); */ -#define mxSWIGError "mxSWIGError" - -/* EOF */ -#endif - diff -Nru simpleparse-2.1.0a1/stt/TextTools/mxTextTools/mxh.h simpleparse-2.2.0/stt/TextTools/mxTextTools/mxh.h --- simpleparse-2.1.0a1/stt/TextTools/mxTextTools/mxh.h 2006-02-18 23:40:03.000000000 +0000 +++ simpleparse-2.2.0/stt/TextTools/mxTextTools/mxh.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,60 +0,0 @@ -#ifndef MXH_H -#define MXH_H - -/* - mxh.h -- Generic header file for all mx Extenstions - - This file should be included by every mx Extension header file - and the C file. - - Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com - Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com - See the documentation for further copyright information or contact - the author. - -*/ - -/* - Macros to control export and import of DLL symbols. - - We use our own definitions since Python's don't allow specifying - both imported and exported symbols at the same time; these defines - haven't been thoroughly tested yet, patches are most welcome :-) - -*/ - -/* Macro to "mark" a symbol for DLL export */ - -#if (defined(_MSC_VER) && _MSC_VER > 850 \ - || defined(__MINGW32__) || defined(__CYGWIN) || defined(__BEOS__)) -# ifdef __cplusplus -# define MX_EXPORT(type) extern "C" type __declspec(dllexport) -# else -# define MX_EXPORT(type) extern type __declspec(dllexport) -# endif -#elif defined(__WATCOMC__) -# define MX_EXPORT(type) extern type __export -#elif defined(__IBMC__) -# define MX_EXPORT(type) extern type _Export -#else -# define MX_EXPORT(type) extern type -#endif - -/* Macro to "mark" a symbol for DLL import */ - -#if defined(__BORLANDC__) -# define MX_IMPORT(type) extern type __import -#elif (defined(_MSC_VER) && _MSC_VER > 850 \ - || defined(__MINGW32__) || defined(__CYGWIN) || defined(__BEOS__)) -# ifdef __cplusplus -# define MX_IMPORT(type) extern "C" type __declspec(dllimport) -# else -# define MX_IMPORT(type) extern type __declspec(dllimport) -# endif - -#else -# define MX_IMPORT(type) extern type -#endif - -/* EOF */ -#endif diff -Nru simpleparse-2.1.0a1/stt/TextTools/mxTextTools/mxpyapi.h simpleparse-2.2.0/stt/TextTools/mxTextTools/mxpyapi.h --- simpleparse-2.1.0a1/stt/TextTools/mxTextTools/mxpyapi.h 2006-02-18 23:40:03.000000000 +0000 +++ simpleparse-2.2.0/stt/TextTools/mxTextTools/mxpyapi.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,468 +0,0 @@ -#ifndef MXPYAPI_H -#define MXPYAPI_H - -/* mxpyapi.h - - This header file includes some new APIs that are not available in - older API versions, yet are used by the mx-Extensions. - - Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com - Copyright (c) 2000-2003, eGenix.com Software GmbH; mailto:info@egenix.com - -*/ - -/* Emulate PY_VERSION_HEX for older Python versions. */ - -#ifndef PY_VERSION_HEX -# if PYTHON_API_VERSION == 1007 -# define PY_VERSION_HEX 0x010500F0 -# endif -# if PYTHON_API_VERSION == 1006 -# define PY_VERSION_HEX 0x010400F0 -# endif -# if PYTHON_API_VERSION < 1006 -# define PY_VERSION_HEX 0 -# endif -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -/* - ---------------------------------------------------------------- - Python 2.3 and above: - ---------------------------------------------------------------- -*/ - -#if PY_VERSION_HEX >= 0x02030000 - -#else - -/* These were introduced in Python 2.3: */ -# define PY_LONG_LONG LONG_LONG - -#endif - -/* - ---------------------------------------------------------------- - Python 2.2 and above: - ---------------------------------------------------------------- -*/ - -#if PY_VERSION_HEX >= 0x02020000 - -# define HAVE_SUBCLASSABLE_TYPES 1 - -#else - -/* These were introduced in Python 2.2: */ -# define PyString_CheckExact PyString_Check -# define PyInt_CheckExact PyInt_Check -# define PyFloat_CheckExact PyFloat_Check -# define PyLong_CheckExact PyLong_Check - -#endif - -/* - ---------------------------------------------------------------- - Python 1.6 and 2.0 only: - ---------------------------------------------------------------- -*/ - -/* Disabled: we don't support Python 1.6 and don't have a need for - this API in any of the mx Tools (yet). */ -#if 0 -#if PY_VERSION_HEX >= 0x01060000 && PY_VERSION_HEX < 0x02010000 - -static PyObject * -PyObject_Unicode(PyObject *v) -{ - PyObject *res; - - if (v == NULL) - res = PyString_FromString(""); - else if (PyUnicode_Check(v)) { - Py_INCREF(v); - return v; - } - else if (PyString_Check(v)) - res = v; - else if (v->ob_type->tp_str != NULL) - res = (*v->ob_type->tp_str)(v); - else { - PyObject *func; - if (!PyInstance_Check(v) || - (func = PyObject_GetAttrString(v, "__str__")) == NULL) { - PyErr_Clear(); - res = PyObject_Repr(v); - } - else { - res = PyEval_CallObject(func, (PyObject *)NULL); - Py_DECREF(func); - } - } - if (res == NULL) - return NULL; - if (!PyUnicode_Check(res)) { - PyObject* str; - str = PyUnicode_FromObject(res); - Py_DECREF(res); - if (str) - res = str; - else - return NULL; - } - return res; -} - -#endif -#endif - -/* - ---------------------------------------------------------------- - Python 2.0 alpha + betas: - ---------------------------------------------------------------- -*/ - -#if PY_VERSION_HEX >= 0x02000000 && PY_VERSION_HEX < 0x020000F0 - -# if defined(PyBuffer_Check) && !defined(HAVE_PYTHON_BUFFEROBJECTS) -# define HAVE_PYTHON_BUFFEROBJECTS -# endif - -#ifndef Py_PROTO -# define Py_PROTO(args) args -#endif - -#endif - -/* - ---------------------------------------------------------------- - Python 1.6 and later: - ---------------------------------------------------------------- -*/ - -#if PY_VERSION_HEX >= 0x01060000 - -#if defined(PyUnicode_Check) && !defined(HAVE_UNICODE) -# define HAVE_UNICODE -#endif - -#endif - -#if PY_VERSION_HEX >= 0x02020000 - -#if defined(HAVE_UNICODE) && !defined(Py_USING_UNICODE) -# undef HAVE_UNICODE -#endif - -#endif - -#ifndef HAVE_UNICODE -# undef PyUnicode_Check -# define PyUnicode_Check(obj) 0 -#endif - -/* - ---------------------------------------------------------------- - Python < 1.6: - ---------------------------------------------------------------- -*/ - -#if PY_VERSION_HEX < 0x01060000 - -#if !defined(PyObject_DEL) -# define PyObject_DEL(x) free(x) -# define PyObject_Del(x) free(x) -#endif - -#endif - -/* - ---------------------------------------------------------------- - Python >= 1.5.2: - ---------------------------------------------------------------- -*/ - -#if PY_VERSION_HEX >= 0x010502F0 - -# if defined(PyBuffer_Check) && !defined(HAVE_PYTHON_BUFFEROBJECTS) -# define HAVE_PYTHON_BUFFEROBJECTS -# endif - -#endif - -/* - ---------------------------------------------------------------- - Python >= 1.5.2 and prior to 2.0a1 - ---------------------------------------------------------------- -*/ - -#if PY_VERSION_HEX >= 0x010502F0 && PY_VERSION_HEX < 0x02000000 - -/* Takes an arbitrary object which must support the (character, single - segment) buffer interface and returns a pointer to a read-only - memory location useable as character based input for subsequent - processing. - - buffer and buffer_len are only set in case no error - occurrs. Otherwise, -1 is returned and an exception set. - -*/ - -static -int PyObject_AsCharBuffer(PyObject *obj, - const char **buffer, - int *buffer_len) -{ - PyBufferProcs *pb = obj->ob_type->tp_as_buffer; - const char *pp; - int len; - - if ( pb == NULL || - pb->bf_getcharbuffer == NULL || - pb->bf_getsegcount == NULL ) { - PyErr_SetString(PyExc_TypeError, - "expected a character buffer object"); - goto onError; - } - if ( (*pb->bf_getsegcount)(obj,NULL) != 1 ) { - PyErr_SetString(PyExc_TypeError, - "expected a single-segment buffer object"); - goto onError; - } - len = (*pb->bf_getcharbuffer)(obj,0,&pp); - if (len < 0) - goto onError; - *buffer = pp; - *buffer_len = len; - return 0; - - onError: - return -1; -} - -/* Same as PyObject_AsCharBuffer() except that this API expects - (readable, single segment) buffer interface and returns a pointer - to a read-only memory location which can contain arbitrary data. - - buffer and buffer_len are only set in case no error - occurrs. Otherwise, -1 is returned and an exception set. - -*/ - -static -int PyObject_AsReadBuffer(PyObject *obj, - const void **buffer, - int *buffer_len) -{ - PyBufferProcs *pb = obj->ob_type->tp_as_buffer; - void *pp; - int len; - - if ( pb == NULL || - pb->bf_getreadbuffer == NULL || - pb->bf_getsegcount == NULL ) { - PyErr_SetString(PyExc_TypeError, - "expected a readable buffer object"); - goto onError; - } - if ( (*pb->bf_getsegcount)(obj,NULL) != 1 ) { - PyErr_SetString(PyExc_TypeError, - "expected a single-segment buffer object"); - goto onError; - } - len = (*pb->bf_getreadbuffer)(obj,0,&pp); - if (len < 0) - goto onError; - *buffer = pp; - *buffer_len = len; - return 0; - - onError: - return -1; -} - -/* Takes an arbitrary object which must support the (writeable, single - segment) buffer interface and returns a pointer to a writeable - memory location in buffer of size buffer_len. - - buffer and buffer_len are only set in case no error - occurrs. Otherwise, -1 is returned and an exception set. - -*/ - -static -int PyObject_AsWriteBuffer(PyObject *obj, - void **buffer, - int *buffer_len) -{ - PyBufferProcs *pb = obj->ob_type->tp_as_buffer; - void*pp; - int len; - - if ( pb == NULL || - pb->bf_getwritebuffer == NULL || - pb->bf_getsegcount == NULL ) { - PyErr_SetString(PyExc_TypeError, - "expected a writeable buffer object"); - goto onError; - } - if ( (*pb->bf_getsegcount)(obj,NULL) != 1 ) { - PyErr_SetString(PyExc_TypeError, - "expected a single-segment buffer object"); - goto onError; - } - len = (*pb->bf_getwritebuffer)(obj,0,&pp); - if (len < 0) - goto onError; - *buffer = pp; - *buffer_len = len; - return 0; - - onError: - return -1; -} - -#endif /* Python Version in [1.5.2b2, 2.0) */ - -/* - ---------------------------------------------------------------- - Python 1.5.2b1 and older: - ---------------------------------------------------------------- -*/ - -#if PY_VERSION_HEX <= 0x010502B1 - -/* These are missing from PC/python_nt.def and thus didn't get included - in python1.5.lib on Windows platforms. */ -#ifdef MS_WIN32 -# define PyString_InternInPlace(x) -# define PyString_InternFromString(x) PyString_FromString(x) -#endif - -#endif /* Python Version <= 1.5.2b1 */ - -/* - ---------------------------------------------------------------- - Python 1.5.2a2 and older: - ---------------------------------------------------------------- -*/ - -#if PY_VERSION_HEX <= 0x010502A2 - -extern long PyOS_strtol Py_PROTO((const char *, char **, int)); - -#endif /* Python Version <= 1.5.2a2 */ - -/* - ---------------------------------------------------------------- - Python 1.5.2a1 and older: - ---------------------------------------------------------------- -*/ - -#if PY_VERSION_HEX <= 0x010502A1 - -#ifndef PyList_SET_ITEM -# define PyList_SET_ITEM(l,i,w) PyList_GET_ITEM(l,i) = w -#endif - -#endif /* Python Version < 1.5.2a1 */ - -/* - ---------------------------------------------------------------- - Prior to Python 1.5: - ---------------------------------------------------------------- -*/ - -#if PY_VERSION_HEX < 0x010500F0 - -/* New in Python1.5: */ -# undef PyString_AS_STRING -# define PyString_AS_STRING(op) (((PyStringObject *)(op))->ob_sval) -# define PyString_GET_SIZE(op) (((PyStringObject *)(op))->ob_size) -# define PyTuple_GET_SIZE(op) (((PyTupleObject *)(op))->ob_size) -# define PyList_GET_SIZE(op) (((PyListObject *)(op))->ob_size) - -/* Changed since 1.4 */ -# undef PyList_GET_ITEM -# define PyList_GET_ITEM(op, i) (((PyListObject *)(op))->ob_item[i]) -# undef PyInt_AS_LONG -# define PyInt_AS_LONG(op) (((PyIntObject *)(op))->ob_ival) -# undef PyFloat_AS_DOUBLE -# define PyFloat_AS_DOUBLE(op) (((PyFloatObject *)(op))->ob_fval) - -/* This function is taken from error.c in Python 1.5... - - Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam, The - Netherlands. - - All Rights Reserved - - Permission to use, copy, modify, and distribute this software and its - documentation for any purpose and without fee is hereby granted, provided - that the above copyright notice appear in all copies and that both that - copyright notice and this permission notice appear in supporting - documentation, and that the names of Stichting Mathematisch Centrum or - CWI or Corporation for National Research Initiatives or CNRI not be used - in advertising or publicity pertaining to distribution of the software - without specific, written prior permission. - - While CWI is the initial source for this software, a modified version is - made available by the Corporation for National Research Initiatives - (CNRI) at the Internet address ftp://ftp.python.org. - - STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH - REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH - CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL - DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR - PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS - ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF - THIS SOFTWARE. - -*/ - -#ifdef HAVE_STDARG_PROTOTYPES -PyObject * -PyErr_Format(PyObject *exception, const char *format, ...) -#else -PyObject * -PyErr_Format(exception, format, va_alist) - PyObject *exception; - const char *format; - va_dcl -#endif -{ - va_list vargs; - char buffer[500]; /* Caller is responsible for limiting the format */ - -#ifdef HAVE_STDARG_PROTOTYPES - va_start(vargs, format); -#else - va_start(vargs); -#endif - - vsprintf(buffer, format, vargs); - PyErr_SetString(exception, buffer); - return NULL; -} - -/* Python 1.5 uses instances as exceptions, the 1.4 API only knows - about strings. */ -#define PyErr_NewException(name,base,dict) PyString_FromString(fullname); - -/* Missing from rename2.h in Python 1.4 */ -#ifndef PyVarObject -# define PyVarObject varobject -#endif - -#endif /* Python Version < 1.5 */ - -#ifdef __cplusplus -} -#endif - -/* EOF */ -#endif diff -Nru simpleparse-2.1.0a1/stt/TextTools/mxTextTools/mxstdlib.h simpleparse-2.2.0/stt/TextTools/mxTextTools/mxstdlib.h --- simpleparse-2.1.0a1/stt/TextTools/mxTextTools/mxstdlib.h 2006-02-18 23:40:03.000000000 +0000 +++ simpleparse-2.2.0/stt/TextTools/mxTextTools/mxstdlib.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,254 +0,0 @@ -#ifndef MXSTDLIB_H -#define MXSTDLIB_H - -/* Standard stuff I use often -- not Python specific - - Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com - Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com - See the documentation for further copyright information or contact - the author. - - */ - -#include -#include -#include -#include -#include -#include -#ifdef HAVE_LIMITS_H -#include -#else -#ifndef INT_MAX -# define INT_MAX 2147483647 -#endif -#ifndef LONG_MAX -# define LONG_MAX INT_MAX -#endif -#endif - -/* --- My own macros for memory allocation... --------------------------- */ - -#ifdef MAL_MEM_DEBUG -# define newstruct(x) \ - (mxDebugPrintf("* malloc for struct "#x" (%s:%i)\n",__FILE__,__LINE__),\ - (x *)malloc(sizeof(x))) -# define cnewstruct(x) \ - (mxDebugPrintf("* calloc for struct "#x" (%s:%i)\n",c,__FILE__,__LINE__),\ - (x *)calloc(sizeof(x),1)) -# define new(x,c) \ - (mxDebugPrintf("* malloc for "#c"=%i '"#x"'s (%s:%i)\n",c,__FILE__,__LINE__),\ - (x *)malloc(sizeof(x)*(c))) -# define cnew(x,c) \ - (mxDebugPrintf("* calloc for "#c"=%i '"#x"'s (%s:%i)\n",c,__FILE__,__LINE__),\ - (x *)calloc((c),sizeof(x))) -# define resize(var,x,c) \ - (mxDebugPrintf("* realloc array "#var" ("#x") at %X to size "#c"=%i (%s:%i)\n",var,c,__FILE__,__LINE__),\ - (x *)realloc((void*)(var),sizeof(x)*(c))) -# define varresize(var,x,bytes) \ - (mxDebugPrintf("* realloc var "#var" ("#x") at %X to %i bytes (%s:%i)\n",var,bytes,__FILE__,__LINE__),\ - (x *)realloc((void*)(var),(bytes))) -# define free(x) \ - (mxDebugPrintf("* freeing "#x" at %X (%s:%i)\n",x,__FILE__,__LINE__),\ - free((void*)(x))) -#else -# define newstruct(x) ((x *)malloc(sizeof(x))) -# define cnewstruct(x) ((x *)calloc(sizeof(x),1)) -# define new(x,c) ((x *)malloc(sizeof(x)*(c))) -# define cnew(x,c) ((x *)calloc((c),sizeof(x))) -# define resize(var,x,c) ((x *)realloc((void*)(var),sizeof(x)*(c))) -# define varresize(var,x,bytes) ((x *)realloc((void*)(var),(bytes))) -# define free(x) free((void*)(x)) -#endif - -/* --- Debugging output ------------------------------------------------- */ - -/* Use the flag MAL_DEBUG to enable debug processing. - - The flag MAL_DEBUG_WITH_PYTHON can be used to indicate that the - object file will be linked with Python, so we can use Python APIs - for the debug processing here. - -*/ -#ifdef MAL_DEBUG_WITH_PYTHON -# ifndef PYTHON_API_VERSION -# error "mx.h must be included when compiling with MAL_DEBUG_WITH_PYTHON" -# endif -# ifndef MAL_DEBUG -# define MAL_DEBUG -# endif -#else -# if defined(PYTHON_API_VERSION) && defined(MAL_DEBUG) -# define MAL_DEBUG_WITH_PYTHON -# endif -#endif - -/* Indicator for the availability of these interfaces: */ - -#define HAVE_MAL_DEBUG - -/* Name of the environment variable defining the log file name - to be used: */ - -#ifndef MAL_DEBUG_OUTPUTFILE_ENV_VARIABLE -# define MAL_DEBUG_OUTPUTFILE_ENV_VARIABLE "mxLogFile" -#endif - -/* File name to be used for debug logging (each object file using this - facility may set its own logging file) if no environment variable - is set: */ - -#ifndef MAL_DEBUG_OUTPUTFILE -# define MAL_DEBUG_OUTPUTFILE "mx.log" -#endif - -/* Name of the environment variable defining the log file prefix to be - used (e.g. to direct all log files into a separate directory): */ - -#ifndef MAL_DEBUG_OUTPUTFILEPREFIX_ENV_VARIABLE -# define MAL_DEBUG_OUTPUTFILEPREFIX_ENV_VARIABLE "mxLogFileDir" -#endif - -/* File name prefix to be used for log files, if no environment - variable is set: */ - -#ifndef MAL_DEBUG_OUTPUTFILEPREFIX -# define MAL_DEBUG_OUTPUTFILEPREFIX "" -#endif - -/* Log id to be used */ - -#ifndef MAL_DEBUG_LOGID -# define MAL_DEBUG_LOGID "New Log Session" -#endif - -/* Debug printf() API - - Output is written to a log file or stream. If the output file is - not yet open, the function will try to open the file as defined by - the environment or the program defines. The file remains open - until the program terminates. Subsequent changes to the environment - are not taken into account. - - The output file is deduced in the following way: - - 1. get the filename from the environment, revert to the predefined - value - - 2. get the filename prefix from the environment, revert to - the predefined value - - 3. if filename is one of "stderr" or "stdout" use the native - streams for output; otherwise try to open fileprefix + filename - reverting to stderr in case this fails. - - */ - -static -int mxDebugPrintf(const char *format, ...) -{ - va_list args; - static FILE *mxDebugPrintf_file; - - if (!mxDebugPrintf_file) { - time_t now; - char *filename,*fileprefix; - - now = time(NULL); - filename = getenv(MAL_DEBUG_OUTPUTFILE_ENV_VARIABLE); - if (!filename) - filename = MAL_DEBUG_OUTPUTFILE; - fileprefix = getenv(MAL_DEBUG_OUTPUTFILEPREFIX_ENV_VARIABLE); - if (!fileprefix) - fileprefix = MAL_DEBUG_OUTPUTFILEPREFIX; - if (strcmp(filename,"stdout") == 0) - mxDebugPrintf_file = stdout; - else if (strcmp(filename,"stderr") == 0) - mxDebugPrintf_file = stderr; - else { - char logfile[512]; - - strncpy(logfile,fileprefix,sizeof(logfile)); - strncat(logfile,filename,sizeof(logfile)); - mxDebugPrintf_file = fopen(logfile,"ab"); - if (!mxDebugPrintf_file) { - /* Hack to shut up "cc -Wall" warning that this - function is not used... */ - static void *mxDebugPrintf_used; - mxDebugPrintf_used = (void *)mxDebugPrintf; - /* Default to stderr in case the log file cannot be - opened */ - mxDebugPrintf_file = stderr; - fprintf(mxDebugPrintf_file, - "\n*** Failed to open log file '%s'; " - "using stderr\n",logfile); - } - } - fprintf(mxDebugPrintf_file, - "\n--- "MAL_DEBUG_LOGID" --- %s\n", - ctime(&now)); - } - - va_start(args,format); - vfprintf(mxDebugPrintf_file,format,args); - fflush(mxDebugPrintf_file); - va_end(args); - return 1; -} - -#ifdef MAL_DEBUG - -# ifdef MAL_DEBUG_WITH_PYTHON -/* Use the Python debug flag to enable debugging output (python -d) */ -# define DPRINTF if (Py_DebugFlag) mxDebugPrintf -# define IF_DEBUGGING if (Py_DebugFlag) -# define DEBUGGING (Py_DebugFlag > 0) -# else - -/* Always output debugging information */ -# define DPRINTF mxDebugPrintf -# define IF_DEBUGGING -# define DEBUGGING (1) -# endif - -#else - -# ifndef _MSC_VER -/* This assumes that you are using an optimizing compiler which - eliminates the resulting debug code. */ -# define DPRINTF if (0) mxDebugPrintf -# define IF_DEBUGGING if (0) -# define DEBUGGING (0) -# else - -/* MSVC doesn't do a good job here, so we use a different approach. */ -# define DPRINTF 0 && mxDebugPrintf -# define IF_DEBUGGING if (0) -# define DEBUGGING (0) -# endif - -#endif - -/* --- Misc ------------------------------------------------------------- */ - -/* The usual bunch... */ -#ifndef max -# define max(a,b) ((a>b)?(a):(b)) -#endif -#ifndef min -# define min(a,b) ((aob_size;\ - returnCode = NULL_CODE;\ - loopcount = -1;\ - loopstart = startPosition;\ - taglist_len = PyList_Size( taglist );\ -} - -/* Macro to reset tag-specific variables - -*/ -#define RESET_TAG_VARIABLES {\ - childStart = position;\ - childPosition = position;\ - childReturnCode = NULL_CODE;\ - childResults = NULL;\ -} -/* Macro to decode a tag-entry into local variables */ -#define DECODE_TAG {\ - mxTagTableEntry *entry;\ - entry = &table->entry[index];\ - command = entry->cmd;\ - flags = entry->flags;\ - match = entry->args;\ - failureJump = entry->jne;\ - successJump = entry->je;\ - tagobj = entry->tagobj;\ - if (tagobj == NULL) { tagobj = Py_None;}\ -} - -/* macro to push relevant local variables onto the stack and setup for child table - newTable becomes table, newResults becomes taglist - - This is currently only called in the Table/SubTable family of commands, - could be inlined there, but I find it cleaner to read here. -*/ -#define PUSH_STACK( newTable, newResults ) {\ - stackTemp = (recursive_stack_entry *) PyMem_Malloc( sizeof( recursive_stack_entry ));\ - stackTemp->parent = stackParent;\ - stackTemp->position = position;\ - stackTemp->startPosition = startPosition;\ - stackTemp->table = table;\ - stackTemp->index = index;\ - stackTemp->childStart = childStart;\ - stackTemp->resultsLength = taglist_len;\ - stackTemp->results = taglist;\ - \ - stackParent = stackTemp;\ - childReturnCode = PENDING_CODE;\ - \ - startPosition = position;\ - table = (mxTagTableObject *) newTable;\ - taglist = newResults;\ -} -#define POP_STACK {\ - if (stackParent) {\ - childStart = stackParent->childStart;\ - childPosition = position;\ - position = stackParent->position;\ - \ - startPosition = stackParent->startPosition;\ - \ - childResults = taglist;\ - taglist_len = stackParent->resultsLength;\ - taglist = stackParent->results;\ - if (table != stackParent->table ) { Py_DECREF( table ); }\ - table = stackParent->table;\ - table_len = table->ob_size;\ - index = stackParent->index;\ - \ - stackTemp = stackParent->parent;\ - PyMem_Free( stackParent );\ - stackParent = stackTemp;\ - stackTemp = NULL;\ - \ - childReturnCode = returnCode;\ - returnCode = NULL_CODE;\ - }\ -} - - -#endif - -/* mxTextTools_TaggingEngine(): a table driven parser engine - - - return codes: returnCode = 2: match ok; returnCode = 1: match failed; returnCode = 0: error - - doesn't check type of passed arguments ! - - doesn't increment reference counts of passed objects ! -*/ - - - -int TE_ENGINE_API( - PyObject *textobj, - int sliceleft, - int sliceright, - mxTagTableObject *table, - PyObject *taglist, - PyObject *context, - int *next -) { - TE_CHAR *text = NULL; /* Pointer to the text object's data */ - - /* local variables pushed into stack on recurse */ - /* whole-table variables */ - int position = sliceleft; /* current (head) position in text for whole table */ - int startPosition = sliceleft; /* start position for current tag */ - int table_len = table->ob_size; /* table length */ - short returnCode = NULL_CODE; /* return code: -1 not set, 0 error, 1 - not ok, 2 ok */ - int index=0; /* index of current table entry */ - int taglist_len = PyList_Size( taglist ); - - - /* variables tracking status of the current tag */ - register short childReturnCode = NULL_CODE; /* the current child's return code value */ - int childStart = startPosition; - register int childPosition = startPosition; - PyObject *childResults = NULL; /* store's the current child's results (for table children) */ - int flags=0; /* flags set in command */ - int command=0; /* command */ - int failureJump=0; /* rel. jump distance on 'not matched', what should the default be? */ - int successJump=1; /* dito on 'matched', what should the default be? */ - PyObject *match=NULL; /* matching parameter */ - int loopcount = -1; /* loop counter */ - int loopstart = startPosition; /* loop start position */ - PyObject *tagobj = NULL; - - - /* parentTable is our nearest parent, i.e. the next item to pop - off the processing stack. We copied our local variables to it - before starting a child table, and will copy back from it when - we finish the child table. It's normally NULL - */ - recursive_stack_entry * stackParent = NULL; - recursive_stack_entry * stackTemp = NULL; /* just temporary storage for parent pointers */ - - /* Error-management variables */ - PyObject * errorType = NULL; - PyObject * errorMessage = NULL; - - /* Initialise the buffer - - Here is where we will add memory-mapped file support I think... - - expand the TE_STRING macros to check for mmap file objects - (only for str-type) and to access their values appropriately - f = open('c:\\temp\\test.mem', 'r') - buffer = mmap.mmap( f.fileno(), 0, access = mmap.ACCESS_READ ) - - */ - if (!TE_STRING_CHECK(textobj)) { - returnCode = ERROR_CODE; - errorType = PyExc_TypeError; - errorMessage = PyString_FromFormat( - "Expected a string or unicode object to parse: found %.50s", - textobj->ob_type->tp_name - ); - } else { - text = TE_STRING_AS_STRING(textobj); - if (text == NULL) { - returnCode = ERROR_CODE; - } - } - - while (1) { - /* this loop processes a whole table */ - while ( - (index < table_len) & - (returnCode == NULL_CODE) & - (index >= 0) - ) { - DPRINTF( "index %i\n", index ); - DECODE_TAG - if (childReturnCode == NULL_CODE ) { - /* if we are not continuing processing of the child - from a previous iteration we need to unpack the - child into local variables - */ - RESET_TAG_VARIABLES - childStart = position; - childPosition = position; - - } - if (command < MATCH_MAX_LOWLEVEL) { -#include "lowlevelcommands.h" - } else { - switch (command) { -/* Jumps & special commands */ -#include "speccommands.h" -/* non-table-recursion high-level stuff */ -#include "highcommands.h" -/* the recursive table commands */ -#include "recursecommands.h" - default: - { - childReturnCode = ERROR_CODE; - errorType = PyExc_ValueError; - errorMessage = PyString_FromFormat( - "Unrecognised command code %i", - command - ); - } - } - } - /* we're done a single tag, process partial results for the current child - - This is a major re-structuring point. Previously - all of this was scattered around (and duplicated among) - the various command and command-group clauses. - - There also used to be a function call to handle the - append/call functions. That's now handled inline - - */ - /* sanity check wanted by Marc-André for skip-before-buffer */ - if (childPosition < 0) { - childReturnCode = ERROR_CODE; - errorType = PyExc_TypeError; - errorMessage = PyString_FromFormat( - "tagobj (type %.50s) table entry %i moved/skipped beyond start of text (to position %i)", - tagobj->ob_type->tp_name, - index, - childPosition - ); - } - DPRINTF( "switch on return code %i\n", childReturnCode ); - switch(childReturnCode) { - case NULL_CODE: - case SUCCESS_CODE: - /* childReturnCode wasn't set or we positively matched - - positions are always: - childStart, childPosition - sub-results are: - childResults - unless childResults is taglist - in which case we use Py_None for the tag's children - unless childResults is NULL - in which case we create an empty list object - - we call: - tagobj == Py_None : - do nothing... - - [ result tuple needed ] - CallTag: - entry->tagobj( resultTuple ) - AppendToTagobj: - entry->tagobj.append( resultTuple ) - General Case: - taglist.append( resultTuple ) - - AppendMatch: - taglist.append( text[childStart:childPosition] ) - AppendTagobj: - taglist.append( entry->tagobj ) - - if LookAhead is specified: - childPosition is set to childStart before continuing - - finally we set position = childPosition - */ - { - PyObject * objectToCall = NULL; - PyObject * objectCallResult = NULL; - int releaseCallObject = 0; - int releaseChildResults = 0; - int releaseParameter = 1; - PyObject * parameter = NULL; - DPRINTF( "finishing success-code or null \n" ); - - if (tagobj == Py_None ) { - /* XXX note: this short-circuits around "AppendTagobj" flagged items which - specified tagobj == None... don't know if that's wanted or not. Similarly - doesn't report AppendMatch's. Not sure what's appropriate there either. - */ - DPRINTF( "tagobj was none\n" ); - DPRINTF( "Matched %i:%i but result not saved", childStart, childPosition ); - } else { - /* get the callable object */ - /* normally it's taglist.append, do the exceptions first */ - DPRINTF( "tagobj non-None, finding callable\n" ); - if (flags & MATCH_CALLTAG) { - /* want the tag itself */ - objectToCall = tagobj; - } else if (flags & MATCH_APPENDTAG) { - /* AppendToTagobj -> want the tag's append method */ - DPRINTF( "append to tag obj\n" ); - objectToCall = PyObject_GetAttrString( tagobj, "append" ); - DPRINTF( "got object\n"); - if (objectToCall == NULL) { - DPRINTF( "got invalid object\n"); - returnCode = ERROR_CODE; - errorType = PyExc_AttributeError; - errorMessage = PyString_FromFormat( - "tagobj (type %.50s) for table entry %i (flags include AppendTag) doesn't have an append method", - tagobj->ob_type->tp_name, - index - ); - } else { - DPRINTF( "got valid object\n"); - releaseCallObject = 1; - } - } else { - DPRINTF( "appending to tag-list\n" ); - /* append of the taglist, which we know exists, because it's a list - We optimise this to use the raw List API - */ - objectToCall = NULL; /*PyObject_GetAttrString( taglist, "append" );*/ - } - if (returnCode == NULL_CODE && objectToCall && PyCallable_Check(objectToCall)==0) { - /* object to call isn't callable */ - DPRINTF( "object not callable\n" ); - returnCode = ERROR_CODE; - errorType = PyExc_TypeError; - errorMessage = PyString_FromFormat( - "The object to call type(%.50s) for table entry %i isn't callable", - objectToCall->ob_type->tp_name, - index - ); - } - if (returnCode == NULL_CODE) { - /* get the parameter with which to call */ - /* normally it's a result tuple, do exceptions first */ - DPRINTF( "getting parameter\n" ); - if (flags & MATCH_APPENDMATCH) { - /* XXX need to do bounds-checking here - so that: - childStart >= sliceleft - childPosition >= sliceleft - childPosition <= sliceright - */ - /* MATCH_APPENDMATCH cannot occur with any - other flag (makes no sense) so objectToCall - _must_ be the taglist, and we just want to append - the string, not a tuple wrapping the string. That is, - everywhere else we use tuples, here we don't - */ - parameter = TE_STRING_FROM_STRING( - TE_STRING_AS_STRING(textobj) + childStart, - childPosition - childStart - ); - if (parameter == NULL) { - /* error occured getting parameter, report the exception */ - returnCode = ERROR_CODE; - } - } else if ( flags & MATCH_APPENDTAGOBJ) { - /* append the tagobj itself to the results list */ - if (tagobj == NULL) { - parameter = Py_None; - } else { - parameter = tagobj; - } - releaseParameter = 0; - } else { - /* need to know what the child-list is to build resultsTuple - if childResults is non-null and not taglist use it - if childResults == taglist, use Py_None - otherwise use Py_None ( originally we created a new empty list object, that was wrong :) ). - */ - if (childResults == taglist) { - childResults = Py_None ; - } else if (childResults != NULL) { - /* exists already, with a reference from PUSH's creation */ - releaseChildResults = 1; - } else { - /* turns out mxTextTools declares the return value to be - None or [], using None is far more efficient, so I've made - the code use it here */ - childResults = Py_None; - releaseChildResults = 0; /* we aren't increfing it locally */ - } - if (childResults == NULL || tagobj == NULL) { - returnCode = ERROR_CODE; - } else { - if (flags & MATCH_CALLTAG) { - parameter = Py_BuildValue( "OOiiO", taglist, textobj, childStart, childPosition, childResults ); - } else if (flags & MATCH_APPENDTAG) { - /* AppendToTagobj -> want to call append with a 4-tuple of values, so parameter needs to be ((x,y,z,w),) */ - /* XXX can't get the darn thing to accept "((OiiO))" :( */ - parameter = Py_BuildValue( - "((OiiO))", - Py_None, - childStart, - childPosition, - childResults - ); - } else { - /* either we are calling a method that requires the 4 args, or we're appending the 4-tuple to a list */ - parameter = Py_BuildValue( "OiiO", tagobj, childStart, childPosition, childResults ); - } - if (parameter == NULL) { - returnCode = ERROR_CODE; - } - } - } - DPRINTF( "done getting parameter\n" ); - if (parameter == NULL && returnCode == ERROR_CODE && errorType == NULL) { - errorType = PyExc_SystemError; - /* following may fail, as we may have run out of memory */ - errorMessage = PyString_FromFormat( - "Unable to build return-value tuple" - ); - } - /* now have both object and parameter and object is callable */ - if (returnCode == NULL_CODE) { - /* no errors yet */ - DPRINTF( "doing call\n" ); - if (objectToCall) { - DPRINTF( " object call\n" ); - /* explicit object to call */ - Py_INCREF( objectToCall ); - Py_INCREF( parameter ); - DPRINTF( " lock released\n" ); - objectCallResult = PyEval_CallObject( objectToCall, parameter ); - DPRINTF( " call finished\n" ); - Py_DECREF( objectToCall ); - Py_DECREF( parameter ); - DPRINTF( " lock acquired\n" ); - if (objectCallResult == NULL) { - DPRINTF( " null result\n" ); - returnCode = ERROR_CODE; - /* exception is already there, should alter error-handler to check for it */ - } else { - DPRINTF( " non-null result, decrefing\n" ); - Py_DECREF( objectCallResult ); - DPRINTF( " decrefd\n" ); - } - objectCallResult = NULL; - } else { - /* list steals reference */ - DPRINTF( " list append\n" ); - if (PyList_Append( taglist, parameter ) == -1) { - returnCode = ERROR_CODE; - /* list didn't steal ref yet */ - errorType = PyExc_SystemError; - /* following is likely to fail, as we've likely run out of memory */ - errorMessage = PyString_FromFormat( - "Unable to append result tuple to result list!" - ); - } - } - } - } - DPRINTF( "checking whether to release object\n" ); - if (releaseCallObject) { - Py_DECREF( objectToCall ); - } - objectToCall = NULL; - releaseCallObject = 0; - - if (releaseChildResults) { - Py_DECREF( childResults ); - } - childResults = NULL; - releaseChildResults = 0; - if (releaseParameter && parameter ) { - Py_DECREF( parameter ); - } - parameter = NULL; - releaseParameter = 1; - } /* ends the else clause for reporting a result */ - /* reset for lookahead */ - if (flags & MATCH_LOOKAHEAD) { - position = childStart; - } else { - position = childPosition; - } - index += successJump; - DPRINTF( "finished success-handler code\n" ); - break; - } - case FAILURE_CODE: - /* failed, if failure jump is default, should set table returnCode */ - if (childResults) { - if (childResults != taglist) { - /* different list, decref it since we won't be using it any more */ - Py_DECREF( childResults ); - } - childResults = NULL; - } - /* XXX possible (eventual) logic error here? - - fail with jump of 0 might work in certain cases where the - "parsing" is actually occuring outside of the current buffer - (i.e. a side-effect-based parsing node that fails X times before - finally succeeding). - - Don't see anything in current commands that can cause a problem - but we may need to make this an explicitly watched idea, rather - than a consequence of the child failing with a 0 failureJump value. - */ - position = childStart; - if (failureJump == 0) { - returnCode = 1; - } else { - index += failureJump; - } - break; - case PENDING_CODE: - /* the child tag hasn't begun parsing, this was a - recursive-tag-start loop pass. PENDING_CODE is set - by the stack push operation - */ - break; - case ERROR_CODE: - { - /* explicit error encountered while processing this child - - Handle this as gracefully as possible, potentially triggering - huge sets of operations, but therefore needing to be very careful - about system-level errors (such as memory errors). - - 1) Signal whole table as err-d - 2) Record any extra values for the error message? - */ - returnCode = ERROR_CODE; - break; - } - default: - { - /* what error should be raised when an un-recognised return code is generated? */ - returnCode = ERROR_CODE; - errorType = PyExc_SystemError; - errorMessage = PyString_FromFormat( - "An unknown child return code %i was generated by tag-table item %i", - childReturnCode, - index - ); - } - } - childReturnCode = NULL_CODE; - /* single entry processing loop complete */ - } - /* we're done the table, figure out what to do. */ - if (returnCode == NULL_CODE) { - /* no explicit return code was set, but done table: - - index went beyond table_len (>=table_len) -> success - index moved before table start (<= 0) -> failure - */ - if (index >= table_len) { - /* success */ - returnCode = SUCCESS_CODE; - } else if (position >= sliceright) { - /* EOF while parsing, special type of failure - - Eventually allow for returning the whole parse-stack - for restarting the parser from a particular point. - */ - /*returnCode = EOF_CODE;*/ - returnCode = FAILURE_CODE; - } else if (index < 0) { - /* explicit jump before table */ - returnCode = FAILURE_CODE; - } else { - returnCode = FAILURE_CODE; - } - } - if (returnCode == FAILURE_CODE) { - /* truncate result list */ - if (PyList_SetSlice( - taglist, - taglist_len, - PyList_Size(taglist), - NULL) - ) { - returnCode = ERROR_CODE; - errorMessage = PyString_FromFormat( - "Unable to truncate list object (likely tagging engine error) type(%.50s)", - taglist->ob_type->tp_name - ); - } - /* reset position */ - position = startPosition; - } - if (returnCode == ERROR_CODE) { - /* - DO_FANCY_ERROR_REPORTING( ); - - This is where we will do the user-triggered error reporting - (as well as reporting low-level errors such as memory/type/value). - - We have 3 values possibly available: - errorType -> PyObject * to current error class (or NULL) - if it is a MemoryError: - - Jettison some ballast then attempt to return a short - message. Need to create this ballast somewhere for that - to work. - - if is any other error class: - - create the error object and raise it - - decorate it with details: - - current table (need to incref to keep alive) - current index - current position - childStart - childPosition - - if it is simpleparse.stt.TextTools.ParsingError: - (triggered by the user in their grammar) - - create a list of non-None parent tagobjs (a stack - report) and add it to the object - - - - - - 3) Build an actual error object if possible? - 4) Report the parent hierarchy of the failure point - 5) - */ - char * msg = NULL; - if (errorMessage && errorType) { - /* we only report our own error if we've got all the information for it - - XXX Need to check that we don't have cases that are just setting type - */ - msg = PyString_AsString( errorMessage); - PyErr_SetString( errorType, msg ); - Py_DECREF( errorMessage ); - } - - - - /* need to free the whole stack at once */ - while (stackParent != NULL) { - /* this is inefficient, should do it all-in-one-go without copying values back - save for startPosition and returnCode in the last item*/ - POP_STACK - /* need to clean up all INCREF'd objects as we go... */ - if (childResults != taglist) { - /* different list, decref it since we won't be using it any more */ - Py_DECREF( childResults ); - } - childResults = NULL; - } - *next = startPosition; - return 0; - } else { - if (stackParent != NULL) { - /* pop stack also sets the childReturnCode for us... */ - POP_STACK - } else { - /* this was the root table, - return the final results */ - if (returnCode == FAILURE_CODE) { - /* there is a clause in the docs for tag that says - this will return the "error position" for the table. - That requires reporting childPosition for the the - last-matched position */ - *next = childPosition; - } else { - *next = position; - } - return returnCode; - } - } - } /* end of infinite loop */ -} - diff -Nru simpleparse-2.1.0a1/stt/TextTools/mxTextTools/mxTextTools.c simpleparse-2.2.0/stt/TextTools/mxTextTools/mxTextTools.c --- simpleparse-2.1.0a1/stt/TextTools/mxTextTools/mxTextTools.c 2006-02-19 01:03:47.000000000 +0000 +++ simpleparse-2.2.0/stt/TextTools/mxTextTools/mxTextTools.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,5288 +0,0 @@ -/* - mxTextTools -- Fast text manipulation routines - - Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com - Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com -*/ - -/* We want all our symbols to be exported */ -#define MX_BUILDING_MXTEXTTOOLS - -/* Logging file used by debugging facility */ -#ifndef MAL_DEBUG_OUTPUTFILE -# define MAL_DEBUG_OUTPUTFILE "mxTextTools.log" -#endif - -#include "mx.h" -#include "mxTextTools.h" -#include - -#define VERSION "2.1.0" - -/* Initial list size used by e.g. setsplit(), setsplitx(),... */ -#define INITIAL_LIST_SIZE 64 - -/* Maximum TagTable cache size. If this limit is reached, the cache - is cleared to make room for new compile TagTables. */ -#define MAX_TAGTABLES_CACHE_SIZE 100 - -/* Define this to enable the copy-protocol (__copy__, __deepcopy__) */ -#define COPY_PROTOCOL - -/* --- module doc-string -------------------------------------------------- */ - -static char *Module_docstring = - - MXTEXTTOOLS_MODULE" -- Tools for fast text processing. Version "VERSION"\n\n" - - "Copyright (c) 1997-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com\n" - "Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com\n\n" - "Copyright (c) 2003-2006, Mike Fletcher; mailto:mcfletch@vrplumber.com\n\n" - - " All Rights Reserved\n\n" - "See the documentation for further information on copyrights,\n" - "or contact the author." -; - -/* --- internal macros ---------------------------------------------------- */ - -/* --- module globals ----------------------------------------------------- */ - -/* Translation strings for the 8-bit versions of lower() and upper() */ -static PyObject *mx_ToUpper; -static PyObject *mx_ToLower; - -static PyObject *mxTextTools_Error; /* mxTextTools specific error */ - -static PyObject *mxTextTools_TagTables; /* TagTable cache dictionary */ - -/* Flag telling us whether the module was initialized or not. */ -static int mxTextTools_Initialized = 0; - -/* --- forward declarations ----------------------------------------------- */ - -/* --- module helper ------------------------------------------------------ */ - -static -PyObject *mxTextTools_ToUpper(void) -{ - char tr[256]; - int i; - - for (i = 0; i < 256; i++) - tr[i] = toupper((char)i); - return PyString_FromStringAndSize(tr,sizeof(tr)); -} - -static -PyObject *mxTextTools_ToLower(void) -{ - char tr[256]; - int i; - - for (i = 0; i < 256; i++) - tr[i] = tolower((char)i); - return PyString_FromStringAndSize(tr,sizeof(tr)); -} - -/* Create an exception object, insert it into the module dictionary - under the given name and return the object pointer; this is NULL in - case an error occurred. base can be given to indicate the base - object to be used by the exception object. It should be NULL - otherwise */ - -static -PyObject *insexc(PyObject *moddict, - char *name, - PyObject *base) -{ - PyObject *v; - char fullname[256]; - char *modname; - char *dot; - - v = PyDict_GetItemString(moddict, "__name__"); - if (v == NULL) - modname = NULL; - else - modname = PyString_AsString(v); - if (modname == NULL) { - PyErr_Clear(); - modname = MXTEXTTOOLS_MODULE; - } - /* The symbols from this extension are imported into - simpleparse.stt.TextTools. We trim the name to not confuse the user with an - overly long package path. */ - strcpy(fullname, modname); - dot = strchr(fullname, '.'); - if (dot) - dot = strchr(dot+1, '.'); - if (dot) - strcpy(dot+1, name); - else - sprintf(fullname, "%s.%s", modname, name); - - v = PyErr_NewException(fullname, base, NULL); - if (v == NULL) - return NULL; - if (PyDict_SetItemString(moddict,name,v)) - return NULL; - return v; -} - -/* Helper for adding integer constants to a dictionary. Check for - errors with PyErr_Occurred() */ -static -void insint(PyObject *dict, - char *name, - int value) -{ - PyObject *v = PyInt_FromLong((long)value); - PyDict_SetItemString(dict, name, v); - Py_XDECREF(v); -} - -/* --- module interface --------------------------------------------------- */ - -/* --- Text Search Object ----------------------------------------------*/ - -staticforward PyMethodDef mxTextSearch_Methods[]; - -/* allocation */ - -static -PyObject *mxTextSearch_New(PyObject *match, - PyObject *translate, - int algorithm) -{ - mxTextSearchObject *so; - - so = PyObject_NEW(mxTextSearchObject, &mxTextSearch_Type); - if (so == NULL) - return NULL; - so->data = NULL; - so->translate = NULL; - so->match = NULL; - - Py_INCREF(match); - so->match = match; - - if (translate == Py_None) - translate = NULL; - else if (translate) { - Py_Assert(PyString_Check(translate), - PyExc_TypeError, - "translate table must be a string"); - Py_Assert(PyString_GET_SIZE(translate) == 256, - PyExc_TypeError, - "translate string must have exactly 256 chars"); - Py_INCREF(translate); - } - so->translate = translate; - - /* Init algorithm */ - so->algorithm = algorithm; - switch (algorithm) { - - case MXTEXTSEARCH_BOYERMOORE: - Py_Assert(PyString_Check(match), - PyExc_TypeError, - "match must be a string for Boyer-Moore"); - so->data = bm_init(PyString_AS_STRING(match), - PyString_GET_SIZE(match)); - Py_Assert(so->data != NULL, - PyExc_TypeError, - "error initializing the search object"); - break; - -#ifdef MXFASTSEARCH - case MXTEXTSEARCH_FASTSEARCH: - Py_Assert(PyString_Check(match), - PyExc_TypeError, - "match must be a string for FastSearch"); - so->data = fs_init(PyString_AS_STRING(match), - PyString_GET_SIZE(match)); - Py_Assert(so->data != NULL, - PyExc_TypeError, - "error initializing the search object"); - break; -#endif - - case MXTEXTSEARCH_TRIVIAL: - Py_Assert(PyString_Check(match) || PyUnicode_Check(match), - PyExc_TypeError, - "match must be a string or unicode"); - Py_Assert(so->translate == NULL, - PyExc_TypeError, - "trivial search algorithm does not support translate"); - break; - - default: - Py_Error(PyExc_ValueError, - "unknown or unsupported algorithm"); - - } - return (PyObject *)so; - - onError: - Py_DECREF(so); - return NULL; -} - -Py_C_Function_WithKeywords( - mxTextSearch_TextSearch, - "TextSearch(match[,translate=None,algorithm=default_algorithm])\n\n" - "Create a substring search object for the string match;\n" - "translate is an optional translate-string like the one used\n" - "in the module re." - ) -{ - PyObject *match = 0; - PyObject *translate = 0; - int algorithm = -424242; - - Py_KeywordsGet3Args("O|Oi:TextSearch",match,translate,algorithm); - - if (algorithm == -424242) { - if (PyUnicode_Check(match)) - algorithm = MXTEXTSEARCH_TRIVIAL; - else -#ifdef MXFASTSEARCH - algorithm = MXTEXTSEARCH_BOYERMOORE; -#else - algorithm = MXTEXTSEARCH_BOYERMOORE; -#endif - } - return mxTextSearch_New(match, translate, algorithm); - - onError: - return NULL; -} - -static -void mxTextSearch_Free(mxTextSearchObject *so) -{ - if (so->data) { - switch (so->algorithm) { - - case MXTEXTSEARCH_BOYERMOORE: - bm_free(so->data); - break; - -#ifdef MXFASTSEARCH - case MXTEXTSEARCH_FASTSEARCH: - fs_free(so->data); - break; -#endif - case MXTEXTSEARCH_TRIVIAL: - break; - - } - } - Py_XDECREF(so->match); - Py_XDECREF(so->translate); - PyObject_Del(so); -} - -/* C APIs */ - -#define so ((mxTextSearchObject *)self) - -/* Get the match length from an TextSearch object or -1 in case of an - error. */ - -int mxTextSearch_MatchLength(PyObject *self) -{ - Py_Assert(mxTextSearch_Check(self), - PyExc_TypeError, - "expected a TextSearch object"); - - switch (so->algorithm) { - - case MXTEXTSEARCH_BOYERMOORE: - return BM_MATCH_LEN(so->data); - break; - -#ifdef MXFASTSEARCH - case MXTEXTSEARCH_FASTSEARCH: - return FS_MATCH_LEN(so->data); - break; -#endif - - case MXTEXTSEARCH_TRIVIAL: - if (PyString_Check(so->match)) - return PyString_GET_SIZE(so->match); -#ifdef HAVE_UNICODE - else if (PyUnicode_Check(so->match)) - return PyUnicode_GET_SIZE(so->match); -#endif - break; - - } - - Py_Error(mxTextTools_Error, - "internal error"); - - onError: - return -1; -} - -static -int trivial_search(const char *text, - int start, - int stop, - const char *match, - int match_len) -{ - int ml1 = match_len - 1; - register const char *tx = &text[start]; - register int x = start; - - if (ml1 < 0) - return start; - - /* Brute-force method; from right to left */ - for (;;) { - register int j = ml1; - register const char *mj = &match[j]; - - if (x + j >= stop) - /* reached eof: no match */ - return start; - - /* scan from right to left */ - for (tx += j; j >= 0 && *tx == *mj; - tx--, mj--, j--) ; - - if (j < 0) { - /* found */ - x += ml1 + 1; - return x; - } - /* not found: rewind and advance one char */ - tx -= j - 1; - x++; - } - return start; -} - -#ifdef HAVE_UNICODE -static -int trivial_unicode_search(const Py_UNICODE *text, - int start, - int stop, - const Py_UNICODE *match, - int match_len) -{ - int ml1 = match_len - 1; - register const Py_UNICODE *tx = &text[start]; - register int x = start; - - if (ml1 < 0) - return start; - - /* Brute-force method; from right to left */ - for (;;) { - register int j = ml1; - register const Py_UNICODE *mj = &match[j]; - - if (x + j >= stop) - /* reached eof: no match */ - return start; - - /* scan from right to left */ - for (tx += j; j >= 0 && *tx == *mj; - tx--, mj--, j--) ; - - if (j < 0) { - /* found */ - x += ml1 + 1; - return x; - } - /* not found: rewind and advance one char */ - tx -= j - 1; - x++; - } - return start; -} -#endif - -/* Search for the match in text[start:stop]. - - Returns 1 in case a match was found and sets sliceleft, sliceright - to the matching slice. - - Returns 0 in case no match was found and -1 in case of an error. - -*/ - -int mxTextSearch_SearchBuffer(PyObject *self, - char *text, - int start, - int stop, - int *sliceleft, - int *sliceright) -{ - int nextpos; - int match_len; - - Py_Assert(mxTextSearch_Check(self), - PyExc_TypeError, - "expected a TextSearch object"); - - switch (so->algorithm) { - - case MXTEXTSEARCH_BOYERMOORE: - if (so->translate) { - /* search with translate table */ - nextpos = bm_tr_search((mxbmse_data *)so->data, - text, - start, - stop, - PyString_AS_STRING(so->translate)); - } - else { - /* exact search */ - nextpos = bm_search((mxbmse_data *)so->data, - text, - start, - stop); - } - match_len = BM_MATCH_LEN(so->data); - break; - -#ifdef MXFASTSEARCH - case MXTEXTSEARCH_FASTSEARCH: - if (so->translate) { - /* search with translate table */ - nextpos = fs_tr_search((mxfse_data *)so->data, - text, - start, - stop, - PyString_AS_STRING(so->translate)); - } - else { - /* exact search */ - nextpos = fs_search((mxfse_data *)so->data, - text, - start, - stop); - } - match_len = FS_MATCH_LEN(so->data); - break; -#endif - - case MXTEXTSEARCH_TRIVIAL: - { - const char *match; - - if (PyString_Check(so->match)) { - match = PyString_AS_STRING(so->match); - match_len = PyString_GET_SIZE(so->match); - } - else if (PyObject_AsCharBuffer(so->match, &match, &match_len)) - goto onError; - nextpos = trivial_search(text, - start, - stop, - match, - match_len); - } - break; - - default: - Py_Error(mxTextTools_Error, - "unknown algorithm type in mxTextSearch_SearchBuffer"); - - } - /* Found ? */ - if (nextpos != start) { - if (sliceleft) - *sliceleft = nextpos - match_len; - if (sliceright) - *sliceright = nextpos; - return 1; - } - /* Not found */ - return 0; - - onError: - return -1; -} - -#ifdef HAVE_UNICODE -int mxTextSearch_SearchUnicode(PyObject *self, - Py_UNICODE *text, - int start, - int stop, - int *sliceleft, - int *sliceright) -{ - int nextpos; - int match_len; - - Py_Assert(mxTextSearch_Check(self), - PyExc_TypeError, - "expected a TextSearch object"); - - switch (so->algorithm) { - - case MXTEXTSEARCH_BOYERMOORE: - Py_Error(PyExc_TypeError, - "Boyer-Moore search algorithm does not support Unicode"); - break; - -#ifdef MXFASTSEARCH - case MXTEXTSEARCH_FASTSEARCH: - Py_Error(PyExc_TypeError, - "FastSearch search algorithm does not support Unicode"); -#endif - - case MXTEXTSEARCH_TRIVIAL: - { - PyObject *u; - Py_UNICODE *match; - - if (PyUnicode_Check(so->match)) { - u = NULL; - match = PyUnicode_AS_UNICODE(so->match); - match_len = PyUnicode_GET_SIZE(so->match); - } - else { - u = PyUnicode_FromEncodedObject(so->match, NULL, NULL); - if (u == NULL) - goto onError; - match = PyUnicode_AS_UNICODE(u); - match_len = PyUnicode_GET_SIZE(u); - } - nextpos = trivial_unicode_search(text, - start, - stop, - match, - match_len); - Py_XDECREF(u); - } - break; - - default: - Py_Error(mxTextTools_Error, - "unknown algorithm type in mxTextSearch_SearchUnicode"); - - } - /* Found ? */ - if (nextpos != start) { - if (sliceleft) - *sliceleft = nextpos - match_len; - if (sliceright) - *sliceright = nextpos; - return 1; - } - /* Not found */ - return 0; - - onError: - return -1; -} -#endif - -/* methods */ - -Py_C_Function( mxTextSearch_search, - "TextSearch.search(text,start=0,stop=len(text))\n\n" - "Search for the substring in text, looking only at the\n" - "slice [start:stop] and return the slice (l,r)\n" - "where the substring was found, (start,start) otherwise.") -{ - PyObject *text; - int start = 0; - int stop = INT_MAX; - int sliceleft, sliceright; - int rc; - - Py_Get3Args("O|ii:TextSearch.search", - text,start,stop); - - if (PyString_Check(text)) { - Py_CheckStringSlice(text, start, stop); - rc = mxTextSearch_SearchBuffer(self, - PyString_AS_STRING(text), - start, - stop, - &sliceleft, - &sliceright); - } -#ifdef HAVE_UNICODE - else if (PyUnicode_Check(text)) { - Py_CheckUnicodeSlice(text, start, stop); - rc = mxTextSearch_SearchUnicode(self, - PyUnicode_AS_UNICODE(text), - start, - stop, - &sliceleft, - &sliceright); - } -#endif - else - Py_Error(PyExc_TypeError, - "expected string or unicode"); - if (rc < 0) - goto onError; - if (rc == 0) { - sliceleft = start; - sliceright = start; - } - - /* Return the slice */ - Py_Return2("ii", sliceleft, sliceright); - - onError: - return NULL; -} - -Py_C_Function( mxTextSearch_find, - "TextSearch.find(text,start=0,stop=len(text))\n\n" - "Search for the substring in text, looking only at the\n" - "slice [start:stop] and return the index\n" - "where the substring was found, -1 otherwise.") -{ - PyObject *text; - int start = 0; - int stop = INT_MAX; - int sliceleft, sliceright; - int rc; - - Py_Get3Args("O|ii:TextSearch.find", - text,start,stop); - - if (PyString_Check(text)) { - Py_CheckStringSlice(text, start, stop); - rc = mxTextSearch_SearchBuffer(self, - PyString_AS_STRING(text), - start, - stop, - &sliceleft, - &sliceright); - } -#ifdef HAVE_UNICODE - else if (PyUnicode_Check(text)) { - Py_CheckUnicodeSlice(text, start, stop); - rc = mxTextSearch_SearchUnicode(self, - PyUnicode_AS_UNICODE(text), - start, - stop, - &sliceleft, - &sliceright); - } -#endif - else - Py_Error(PyExc_TypeError, - "expected string or unicode"); - if (rc < 0) - goto onError; - if (rc == 0) - sliceleft = -1; - return PyInt_FromLong(sliceleft); - - onError: - return NULL; -} - -Py_C_Function( mxTextSearch_findall, - "TextSearch.findall(text,start=0,stop=len(text))\n\n" - "Search for the substring in text, looking only at the\n" - "slice [start:stop] and return a list of all\n" - "non overlapping slices (l,r) in text where the match\n" - "string can be found.") -{ - PyObject *text; - PyObject *list = 0; - int start = 0; - int stop = INT_MAX; - int stop_index; - int match_len; - int listsize = INITIAL_LIST_SIZE; - int listitem = 0; - - Py_Get3Args("O|ii:TextSearch.findall", - text,start,stop); - - if (PyString_Check(text)) { - Py_CheckStringSlice(text, start, stop); - } -#ifdef HAVE_UNICODE - else if (PyUnicode_Check(text)) { - Py_CheckUnicodeSlice(text, start, stop); - } -#endif - else - Py_Error(PyExc_TypeError, - "expected string or unicode"); - - list = PyList_New(listsize); - if (!list) - goto onError; - - match_len = mxTextSearch_MatchLength(self); - if (match_len < 0) - goto onError; - stop_index = stop - match_len; - - while (start <= stop_index) { - register PyObject *t,*v; - int rc; - int sliceleft, sliceright; - - /* exact search */ - if (PyString_Check(text)) - rc = mxTextSearch_SearchBuffer(self, - PyString_AS_STRING(text), - start, - stop, - &sliceleft, - &sliceright); -#ifdef HAVE_UNICODE - else if (PyUnicode_Check(text)) - rc = mxTextSearch_SearchUnicode(self, - PyUnicode_AS_UNICODE(text), - start, - stop, - &sliceleft, - &sliceright); -#endif - else - break; - if (rc < 0) - goto onError; - if (rc == 0) - break; - - /* Build slice and append to list */ - t = PyTuple_New(2); - if (!t) - goto onError; - v = PyInt_FromLong(sliceleft); - if (!v) - goto onError; - PyTuple_SET_ITEM(t,0,v); - v = PyInt_FromLong(sliceright); - if (!v) - goto onError; - PyTuple_SET_ITEM(t,1,v); - - if (listitem < listsize) - PyList_SET_ITEM(list, listitem, t); - else { - PyList_Append(list, t); - Py_DECREF(t); - } - listitem++; - - start = sliceright; - } - - /* Resize list if necessary */ - if (listitem < listsize) - PyList_SetSlice(list, listitem, listsize, (PyObject*)NULL); - - return list; - - onError: - Py_XDECREF(list); - return NULL; -} - -#ifdef COPY_PROTOCOL -Py_C_Function( mxTextSearch_copy, - "copy([memo])\n\n" - "Return a new reference for the instance. This function\n" - "is used for the copy-protocol. Real copying doesn't take\n" - "place, since the instances are immutable.") -{ - PyObject *memo; - - Py_GetArg("|O",memo); - Py_INCREF(so); - return (PyObject *)so; - onError: - return NULL; -} -#endif - -#undef so - -/* --- slots --- */ - -static -PyObject *mxTextSearch_Repr(mxTextSearchObject *self) -{ - char *algoname; - PyObject *v; - char t[500], *reprstr; - - v = PyObject_Repr(self->match); - if (v == NULL) - return NULL; - reprstr = PyString_AsString(v); - if (reprstr == NULL) - return NULL; - - switch (self->algorithm) { - case MXTEXTSEARCH_BOYERMOORE: - algoname = "Boyer-Moore"; - break; -#ifdef MXFASTSEARCH - case MXTEXTSEARCH_FASTSEARCH: - algoname = "FastSearch"; - break; -#endif - case MXTEXTSEARCH_TRIVIAL: - algoname = "Trivial"; - break; - default: - algoname = ""; - } - - sprintf(t, "<%.50s TextSearch object for %.400s at 0x%lx>", - algoname, reprstr, (long)self); - Py_DECREF(v); - return PyString_FromString(t); -} - -static -PyObject *mxTextSearch_GetAttr(mxTextSearchObject *self, - char *name) -{ - PyObject *v; - - if (Py_WantAttr(name,"match")) { - v = self->match; - Py_INCREF(v); - return v; - } - else if (Py_WantAttr(name,"translate")) { - v = self->translate; - if (v == NULL) - v = Py_None; - Py_INCREF(v); - return v; - } - else if (Py_WantAttr(name,"algorithm")) - return PyInt_FromLong(self->algorithm); - else if (Py_WantAttr(name,"__members__")) - return Py_BuildValue("[sss]", - "match", "translate", "algorithm"); - - return Py_FindMethod(mxTextSearch_Methods, (PyObject *)self, (char *)name); -} - -/* Python Type Table */ - -PyTypeObject mxTextSearch_Type = { - PyObject_HEAD_INIT(0) /* init at startup ! */ - 0, /*ob_size*/ - "TextSearch", /*tp_name*/ - sizeof(mxTextSearchObject), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - /* methods */ - (destructor)mxTextSearch_Free, /*tp_dealloc*/ - (printfunc)0, /*tp_print*/ - (getattrfunc)mxTextSearch_GetAttr, /*tp_getattr*/ - (setattrfunc)0, /*tp_setattr*/ - (cmpfunc)0, /*tp_compare*/ - (reprfunc)mxTextSearch_Repr, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_number*/ - 0, /*tp_as_mapping*/ - (hashfunc)0, /*tp_hash*/ - (ternaryfunc)0, /*tp_call*/ - (reprfunc)0, /*tp_str*/ - (getattrofunc)0, /*tp_getattro*/ - (setattrofunc)0, /*tp_setattro*/ -}; - -/* Python Method Table */ - -statichere -PyMethodDef mxTextSearch_Methods[] = -{ - Py_MethodListEntry("search",mxTextSearch_search), - Py_MethodListEntry("find",mxTextSearch_find), - Py_MethodListEntry("findall",mxTextSearch_findall), -#ifdef COPY_PROTOCOL - Py_MethodListEntry("__deepcopy__",mxTextSearch_copy), - Py_MethodListEntry("__copy__",mxTextSearch_copy), -#endif - {NULL,NULL} /* end of list */ -}; - -/* --- Character Set Object --------------------------------------------*/ - -staticforward PyMethodDef mxCharSet_Methods[]; - -/* internal */ - -/* 8-bit character sets are implemented using a simple 32-byte - long bitmap with one bit per character. - - Addressing is done as follows: - - def char_is_set(ordinal): - return bitmap[ordinal >> 3] & (1 << (ordinal & 7)) - -*/ - -#define STRING_CHARSET_SIZE 256 -#define STRING_CHARSET_BITMAP_SIZE (STRING_CHARSET_SIZE / 8) - -typedef struct { - unsigned char bitmap[STRING_CHARSET_BITMAP_SIZE]; - /* character bitmap */ -} string_charset; - -static -int init_string_charset(mxCharSetObject *cs, - PyObject *definition) -{ - register int i, j; - char *def = PyString_AS_STRING(definition); - const int len = PyString_GET_SIZE(definition); - string_charset *lookup = 0; - register unsigned char *bitmap; - int logic = 1; - - /* Handle logic change (first char is '^' for negative matching) */ - if (len > 0 && def[0] == '^') { - logic = 0; - i = 1; - } - else - i = 0; - - /* Build 32-byte lookup bitmap (one bit per character) */ - lookup = (string_charset *)PyMem_Malloc(sizeof(string_charset)); - if (lookup == NULL) { - PyErr_NoMemory(); - goto onError; - } - memset(lookup, 0, sizeof(string_charset)); - cs->mode = MXCHARSET_8BITMODE; - cs->lookup = (void *)lookup; - bitmap = lookup->bitmap; - - for (; i < len; i++) { - - /* Handle escapes: "b\-d", "\\" */ - if (def[i] == '\\') { - if (i < len - 1 && def[i+1] == '\\') { - j = (unsigned char)'\\'; - bitmap[j >> 3] |= 1 << (j & 7); - i++; - } - continue; - } - - /* Handle ranges: "b-d", "\\-z", "\--z" */ - if (i < len - 2 && def[i+1] == '-') { - unsigned char range_left = def[i]; - unsigned char range_right = def[i+2]; - for (j = range_left; j <= range_right; j++) - bitmap[j >> 3] |= 1 << (j & 7); - i++; - continue; - } - - /* Normal processing */ - j = (unsigned char)def[i]; - bitmap[j >> 3] |= 1 << (j & 7); - } - - /* Invert bitmap if negative matching is requested */ - if (!logic) { - DPRINTF("init_string_charset: inverting bitmap\n"); - for (i = 0; i < STRING_CHARSET_BITMAP_SIZE; i++) - bitmap[i] ^= 0xFF; - } - - return 0; - - onError: - if (lookup) - PyMem_Free((void *)lookup); - cs->lookup = 0; - return -1; -} - -#ifdef HAVE_UNICODE - -/* Unicode character sets are implemented using two step indexing - which is a good compromise between lookup speed and memory usage. - - Lookup is done using a variable length array of 32-byte bitmap - blocks. There can be 256 such blocks. Identical blocks are - collapsed into a single copy. - - Addressing is done as follows: - - def char_is_set(ordinal): - index = bitmapindex[ordinal >> 8] - bitmap = bitmaps[index] - return bitmap[(ordinal >> 3) & 31] & (1 << (ordinal & 7)) - - The technique used here is very similar to what is done in Python's - SRE (see the BIGCHARSET patch by Martin von Loewis). Compression - should be reasonably good since character sets in practice usually - only contains a few single characters or longer ranges of Unicode - characters. - -*/ - -#define UNICODE_CHARSET_SIZE 65536 -#define UNICODE_CHARSET_BITMAP_SIZE 32 -#define UNICODE_CHARSET_BITMAPS (UNICODE_CHARSET_SIZE / (UNICODE_CHARSET_BITMAP_SIZE * 8)) -#define UNICODE_CHARSET_BIGMAP_SIZE (UNICODE_CHARSET_SIZE / 8) - -typedef struct { - unsigned char bitmapindex[UNICODE_CHARSET_BITMAPS]; - /* Index to char bitmaps */ - unsigned char bitmaps[UNICODE_CHARSET_BITMAPS][UNICODE_CHARSET_BITMAP_SIZE]; - /* Variable length bitmap array */ -} unicode_charset; - -static -int init_unicode_charset(mxCharSetObject *cs, - PyObject *definition) -{ - register int i, j; - Py_UNICODE *def = PyUnicode_AS_UNICODE(definition); - const int len = PyUnicode_GET_SIZE(definition); - unicode_charset *lookup = 0; - unsigned char bigmap[UNICODE_CHARSET_BIGMAP_SIZE]; - int blocks; - int logic = 1; - - /* Handle logic change (first char is '^' for negative matching) */ - if (len > 0 && def[0] == '^') { - logic = 0; - i = 1; - } - else - i = 0; - - /* Build bigmap */ - memset(bigmap, 0, sizeof(bigmap)); - for (; i < len; i++) { - - /* Handle escapes: "b\-d", "\\" */ - if (def[i] == '\\') { - if (i < len - 1 && def[i+1] == '\\') { - j = (int)'\\'; - bigmap[j >> 3] |= 1 << (j & 7); - i++; - } - continue; - } - - /* Handle ranges: "b-d", "\\-z", "\--z" */ - if (i < len - 2 && def[i+1] == '-') { - Py_UNICODE range_left = def[i]; - Py_UNICODE range_right = def[i+2]; - if (range_right >= UNICODE_CHARSET_SIZE) { - Py_Error(PyExc_ValueError, - "unicode ordinal out of supported range"); - } - for (j = range_left; j <= range_right; j++) - bigmap[j >> 3] |= 1 << (j & 7); - i++; - continue; - } - - /* Normal processing */ - j = def[i]; - if (j >= UNICODE_CHARSET_SIZE) { - Py_Error(PyExc_ValueError, - "unicode ordinal out of supported range"); - } - bigmap[j >> 3] |= 1 << (j & 7); - } - - /* Build lookup table - - XXX Could add dynamic resizing here... probably not worth it - though, since sizeof(unicode_charset) isn't all that large. - - */ - lookup = (unicode_charset *)PyMem_Malloc(sizeof(unicode_charset)); - if (lookup == NULL) { - PyErr_NoMemory(); - goto onError; - } - blocks = 0; - for (i = UNICODE_CHARSET_BITMAPS - 1; i >= 0; i--) { - unsigned char *block = &bigmap[i << 5]; - for (j = blocks - 1; j >= 0; j--) - if (memcmp(lookup->bitmaps[j], block, - UNICODE_CHARSET_BITMAP_SIZE) == 0) - break; - if (j < 0) { - j = blocks; - DPRINTF("init_unicode_charset: Creating new block %i for %i\n", - j, i); - memcpy(lookup->bitmaps[j], block, UNICODE_CHARSET_BITMAP_SIZE); - blocks++; - } - else - DPRINTF("init_unicode_charset: Reusing block %i for %i\n", j, i); - lookup->bitmapindex[i] = j; - } - DPRINTF("init_unicode_charset: Map size: %i block(s) = %i bytes\n", - blocks, UNICODE_CHARSET_BITMAPS + - blocks * UNICODE_CHARSET_BITMAP_SIZE); - lookup = (unicode_charset *)PyMem_Realloc(lookup, - UNICODE_CHARSET_BITMAPS - + blocks * UNICODE_CHARSET_BITMAP_SIZE); - if (lookup == NULL) { - PyErr_NoMemory(); - goto onError; - } - - /* Invert bitmaps if negative matching is requested */ - if (!logic) { - register unsigned char *bitmap = &lookup->bitmaps[0][0]; - DPRINTF("init_unicode_charset: inverting bitmaps\n"); - for (i = 0; i < blocks * UNICODE_CHARSET_BITMAP_SIZE; i++) - bitmap[i] ^= 0xFF; - } - - cs->mode = MXCHARSET_UCS2MODE; - cs->lookup = (void *)lookup; - return 0; - - onError: - if (lookup) - PyMem_Free((void *)lookup); - cs->lookup = 0; - return -1; -} - -#endif - -/* allocation */ - -static -PyObject *mxCharSet_New(PyObject *definition) -{ - mxCharSetObject *cs; - - cs = PyObject_NEW(mxCharSetObject, &mxCharSet_Type); - if (cs == NULL) - return NULL; - Py_INCREF(definition); - cs->definition = definition; - cs->lookup = NULL; - cs->mode = -1; - - if (PyString_Check(definition)) { - if (init_string_charset(cs, definition)) - goto onError; - } -#ifdef HAVE_UNICODE - else if (PyUnicode_Check(definition)) { - if (init_unicode_charset(cs, definition)) - goto onError; - } -#endif - else - Py_Error(PyExc_TypeError, - "character set definition must be string or unicode"); - - return (PyObject *)cs; - - onError: - Py_DECREF(cs); - return NULL; -} - -Py_C_Function( mxCharSet_CharSet, - "CharSet(definition)\n\n" - "Create a character set matching object from the string" - ) -{ - PyObject *definition; - - Py_GetArg("O:CharSet", definition); - return mxCharSet_New(definition); - - onError: - return NULL; -} - -static -void mxCharSet_Free(mxCharSetObject *cs) -{ - Py_XDECREF(cs->definition); - if (cs->lookup) - PyMem_Free(cs->lookup); - PyObject_Del(cs); -} - -/* C APIs */ - -#define cs ((mxCharSetObject *)self) - -int mxCharSet_ContainsChar(PyObject *self, - register unsigned char ch) -{ - if (!mxCharSet_Check(self)) { - PyErr_BadInternalCall(); - goto onError; - } - - if (cs->mode == MXCHARSET_8BITMODE) { - unsigned char *bitmap = ((string_charset *)cs->lookup)->bitmap; - return ((bitmap[ch >> 3] & (1 << (ch & 7))) != 0); - } -#ifdef HAVE_UNICODE - else if (cs->mode == MXCHARSET_UCS2MODE) { - unicode_charset *lookup = (unicode_charset *)cs->lookup; - unsigned char *bitmap = lookup->bitmaps[lookup->bitmapindex[0]]; - return ((bitmap[ch >> 3] & (1 << (ch & 7))) != 0); - } -#endif - else { - Py_Error(mxTextTools_Error, - "unsupported character set mode"); - } - - onError: - return -1; -} - -#ifdef HAVE_UNICODE - -int mxCharSet_ContainsUnicodeChar(PyObject *self, - register Py_UNICODE ch) -{ - if (!mxCharSet_Check(self)) { - PyErr_BadInternalCall(); - goto onError; - } - - if (cs->mode == MXCHARSET_8BITMODE) { - unsigned char *bitmap = ((string_charset *)cs->lookup)->bitmap; - if (ch >= 256) - return 0; - return ((bitmap[ch >> 3] & (1 << (ch & 7))) != 0); - } - else if (cs->mode == MXCHARSET_UCS2MODE) { - unicode_charset *lookup = (unicode_charset *)cs->lookup; - unsigned char *bitmap = lookup->bitmaps[lookup->bitmapindex[ch >> 8]]; - return ((bitmap[(ch >> 3) & 31] & (1 << (ch & 7))) != 0); - } - else { - Py_Error(mxTextTools_Error, - "unsupported character set mode"); - } - - onError: - return -1; -} - -#endif - -static -int mxCharSet_Contains(PyObject *self, - PyObject *other) -{ - if (PyString_Check(other)) { - Py_Assert(PyString_GET_SIZE(other) == 1, - PyExc_TypeError, - "expected a single character"); - return mxCharSet_ContainsChar(self, PyString_AS_STRING(other)[0]); - } -#ifdef HAVE_UNICODE - else if (PyUnicode_Check(other)) { - Py_Assert(PyUnicode_GET_SIZE(other) == 1, - PyExc_TypeError, - "expected a single unicode character"); - return mxCharSet_ContainsUnicodeChar(self, - PyUnicode_AS_UNICODE(other)[0]); - } -#endif - else - Py_Error(PyExc_TypeError, - "expected string or unicode character"); - - onError: - return -1; -} - -/* In mode 1, find the position of the first character in text - belonging to set. This may also be stop or start-1 in case no such - character is found during the search (depending on the direction). - - In mode 0, find the first character not in set. This may also be - stop or start-1 in case no such character is found during the - search (depending on the direction). - - The search is done in the slice start:stop. - - -2 is returned in case of an error. - -*/ - -static -int mxCharSet_FindChar(PyObject *self, - unsigned char *text, - int start, - int stop, - const int mode, - const int direction) -{ - register int i; - register unsigned int c; - register unsigned int block; - unsigned char *bitmap; - - if (!mxCharSet_Check(self)) { - PyErr_BadInternalCall(); - goto onError; - } - - if (cs->mode == MXCHARSET_8BITMODE) - bitmap = ((string_charset *)cs->lookup)->bitmap; -#ifdef HAVE_UNICODE - else if (cs->mode == MXCHARSET_UCS2MODE) { - unicode_charset *lookup = (unicode_charset *)cs->lookup; - bitmap = lookup->bitmaps[lookup->bitmapindex[0]]; - } -#endif - else { - Py_Error(mxTextTools_Error, - "unsupported character set mode"); - } - - if (direction > 0) { - if (mode) - /* Find first char in set */ - for (i = start; i < stop; i++) { - c = text[i]; - block = bitmap[c >> 3]; - if (block && ((block & (1 << (c & 7))) != 0)) - break; - } - else - /* Find first char not in set */ - for (i = start; i < stop; i++) { - c = text[i]; - block = bitmap[c >> 3]; - if (!block || ((block & (1 << (c & 7))) == 0)) - break; - } - } - else { - if (mode) - /* Find first char in set, searching from the end */ - for (i = stop - 1; i >= start; i--) { - c = text[i]; - block = bitmap[c >> 3]; - if (block && ((block & (1 << (c & 7))) != 0)) - break; - } - else - /* Find first char not in set, searching from the end */ - for (i = stop - 1; i >= start; i--) { - c = text[i]; - block = bitmap[c >> 3]; - if (!block || ((block & (1 << (c & 7))) == 0)) - break; - } - } - return i; - - onError: - return -2; -} - -#ifdef HAVE_UNICODE - -static -int mxCharSet_FindUnicodeChar(PyObject *self, - Py_UNICODE *text, - int start, - int stop, - const int mode, - const int direction) -{ - register int i; - register unsigned int c; - register unsigned int block; - unsigned char *bitmap; - - if (!mxCharSet_Check(self)) { - PyErr_BadInternalCall(); - goto onError; - } - - if (cs->mode == MXCHARSET_8BITMODE) { - bitmap = ((string_charset *)cs->lookup)->bitmap; - if (direction > 0) { - if (mode) - /* Find first char in set */ - for (i = start; i < stop; i++) { - c = text[i]; - if (c > 256) - continue; - block = bitmap[c >> 3]; - if (block && ((block & (1 << (c & 7))) != 0)) - break; - } - else - /* Find first char not in set */ - for (i = start; i < stop; i++) { - c = text[i]; - if (c > 256) - break; - block = bitmap[c >> 3]; - if (!block || ((block & (1 << (c & 7))) == 0)) - break; - } - } - else { - if (mode) - /* Find first char in set, searching from the end */ - for (i = stop - 1; i >= start; i--) { - c = text[i]; - if (c > 256) - continue; - block = bitmap[c >> 3]; - if (block && ((block & (1 << (c & 7))) != 0)) - break; - } - else - /* Find first char not in set, searching from the end */ - for (i = stop - 1; i >= start; i--) { - c = text[i]; - if (c > 256) - break; - block = bitmap[c >> 3]; - if (!block || ((block & (1 << (c & 7))) == 0)) - break; - } - } - return i; - } - -#ifdef HAVE_UNICODE - else if (cs->mode == MXCHARSET_UCS2MODE) { - unicode_charset *lookup = (unicode_charset *)cs->lookup; - if (direction > 0) { - if (mode) - /* Find first char in set */ - for (i = start; i < stop; i++) { - c = text[i]; - bitmap = lookup->bitmaps[lookup->bitmapindex[c >> 8]]; - block = bitmap[(c >> 3) & 31]; - if (block && ((block & (1 << (c & 7))) != 0)) - break; - } - else - /* Find first char not in set */ - for (i = start; i < stop; i++) { - c = text[i]; - bitmap = lookup->bitmaps[lookup->bitmapindex[c >> 8]]; - block = bitmap[(c >> 3) & 31]; - if (!block || ((block & (1 << (c & 7))) == 0)) - break; - } - } - else { - if (mode) - /* Find first char in set, searching from the end */ - for (i = stop - 1; i >= start; i--) { - c = text[i]; - bitmap = lookup->bitmaps[lookup->bitmapindex[c >> 8]]; - block = bitmap[(c >> 3) & 31]; - if (block && ((block & (1 << (c & 7))) != 0)) - break; - } - else - /* Find first char not in set, searching from the end */ - for (i = stop - 1; i >= start; i--) { - c = text[i]; - bitmap = lookup->bitmaps[lookup->bitmapindex[c >> 8]]; - block = bitmap[(c >> 3) & 31]; - if (!block || ((block & (1 << (c & 7))) == 0)) - break; - } - } - return i; - } -#endif - else { - Py_Error(mxTextTools_Error, - "unsupported character set mode"); - } - - onError: - return -2; -} - -#endif - -/* Return the position of the first character in text[start:stop] - occurring in set or -1 in case no such character exists. - -*/ - -static -int mxCharSet_Search(PyObject *self, - PyObject *text, - int start, - int stop, - int direction) -{ - int position; - - if (PyString_Check(text)) { - Py_CheckStringSlice(text, start, stop); - position = mxCharSet_FindChar(self, - (unsigned char *)PyString_AS_STRING(text), - start, - stop, - 1, - direction); - } -#ifdef HAVE_UNICODE - else if (PyUnicode_Check(text)) { - Py_CheckUnicodeSlice(text, start, stop); - position = mxCharSet_FindUnicodeChar(self, - PyUnicode_AS_UNICODE(text), - start, - stop, - 1, - direction); - } -#endif - else - Py_Error(PyExc_TypeError, - "expected string or unicode"); - - if ((direction > 0 && position >= stop) || - (direction <= 0 && position < start)) - position = -1; - return position; - - onError: - return -2; -} - -/* Return the longest match of characters from set in - text[start:stop]. - - If direction is positive, the search is done from the left (longest - prefix), otherwise it is started from the right (longest suffix). - - -1 is returned in case of an error. - -*/ - -int mxCharSet_Match(PyObject *self, - PyObject *text, - int start, - int stop, - int direction) -{ - int position; - - if (PyString_Check(text)) { - Py_CheckStringSlice(text, start, stop); - position = mxCharSet_FindChar(self, - (unsigned char *)PyString_AS_STRING(text), - start, - stop, - 0, - direction); - } -#ifdef HAVE_UNICODE - else if (PyUnicode_Check(text)) { - Py_CheckUnicodeSlice(text, start, stop); - position = mxCharSet_FindUnicodeChar(self, - PyUnicode_AS_UNICODE(text), - start, - stop, - 0, - direction); - } -#endif - else - Py_Error(PyExc_TypeError, - "expected string or unicode"); - - if (position < -1) - goto onError; - if (direction > 0) - return position - start; - else - return stop-1 - position; - - onError: - return -1; -} - -/* Stips off characters appearing in the character set from text[start:stop] - and returns the result as Python string object. - - where indicates the mode: - where < 0: strip left only - where = 0: strip left and right - where > 0: strip right only - -*/ -static -PyObject *mxCharSet_Strip(PyObject *self, - PyObject *text, - int start, - int stop, - int where) -{ - int left,right; - - if (!mxCharSet_Check(self)) { - PyErr_BadInternalCall(); - goto onError; - } - - if (PyString_Check(text)) { - Py_CheckStringSlice(text, start, stop); - - /* Strip left */ - if (where <= 0) { - left = mxCharSet_FindChar(self, - (unsigned char *)PyString_AS_STRING(text), - start, - stop, - 0, - 1); - if (left < 0) - goto onError; - } - else - left = start; - - /* Strip right */ - if (where >= 0) { - right = mxCharSet_FindChar(self, - (unsigned char *)PyString_AS_STRING(text), - left, - stop, - 0, - -1) + 1; - if (right < 0) - goto onError; - } - else - right = stop; - - return PyString_FromStringAndSize(PyString_AS_STRING(text) + left, - max(right - left, 0)); - } -#ifdef HAVE_UNICODE - else if (PyUnicode_Check(text)) { - Py_CheckUnicodeSlice(text, start, stop); - - /* Strip left */ - if (where <= 0) { - left = mxCharSet_FindUnicodeChar(self, - PyUnicode_AS_UNICODE(text), - start, - stop, - 0, - 1); - if (left < 0) - goto onError; - } - else - left = start; - - /* Strip right */ - if (where >= 0) { - right = mxCharSet_FindUnicodeChar(self, - PyUnicode_AS_UNICODE(text), - start, - stop, - 0, - -1) + 1; - if (right < 0) - goto onError; - } - else - right = stop; - - return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text) + left, - max(right - left, 0)); - } -#endif - else - Py_Error(PyExc_TypeError, - "expected string or unicode"); - - onError: - return NULL; -} - -static -PyObject *mxCharSet_Split(PyObject *self, - PyObject *text, - int start, - int text_len, - int include_splits) -{ - PyObject *list = NULL; - PyObject *s; - register int x; - int listitem = 0; - int listsize = INITIAL_LIST_SIZE; - - if (!mxCharSet_Check(self)) { - PyErr_BadInternalCall(); - goto onError; - } - - list = PyList_New(listsize); - if (!list) - goto onError; - - if (PyString_Check(text)) { - unsigned char *tx = (unsigned char *)PyString_AS_STRING(text); - - Py_CheckStringSlice(text, start, text_len); - - x = start; - while (x < text_len) { - int z; - - /* Skip all text in set (include_splits == 0), not in set - (include_splits == 1) */ - z = x; - x = mxCharSet_FindChar(self, tx, x, text_len, include_splits, 1); - - /* Append the slice to list */ - if (include_splits) { - s = PyString_FromStringAndSize((char *)&tx[z], x - z); - if (!s) - goto onError; - if (listitem < listsize) - PyList_SET_ITEM(list,listitem,s); - else { - PyList_Append(list,s); - Py_DECREF(s); - } - listitem++; - - if (x >= text_len) - break; - } - - /* Skip all text in set (include_splits == 1), not in set - (include_splits == 0) */ - z = x; - x = mxCharSet_FindChar(self, tx, x, text_len, !include_splits, 1); - - /* Append the slice to list if it is not empty */ - if (x > z) { - s = PyString_FromStringAndSize((char *)&tx[z], x - z); - if (!s) - goto onError; - if (listitem < listsize) - PyList_SET_ITEM(list,listitem,s); - else { - PyList_Append(list,s); - Py_DECREF(s); - } - listitem++; - } - } - - } -#ifdef HAVE_UNICODE - else if (PyUnicode_Check(text)) { - Py_UNICODE *tx = PyUnicode_AS_UNICODE(text); - - Py_CheckUnicodeSlice(text, start, text_len); - - x = start; - while (x < text_len) { - int z; - - /* Skip all text in set (include_splits == 0), not in set - (include_splits == 1) */ - z = x; - x = mxCharSet_FindUnicodeChar(self, tx, x, text_len, include_splits, 1); - - /* Append the slice to list */ - if (include_splits) { - s = PyUnicode_FromUnicode(&tx[z], x - z); - if (!s) - goto onError; - if (listitem < listsize) - PyList_SET_ITEM(list,listitem,s); - else { - PyList_Append(list,s); - Py_DECREF(s); - } - listitem++; - - if (x >= text_len) - break; - } - - /* Skip all text in set (include_splits == 1), not in set - (include_splits == 0) */ - z = x; - x = mxCharSet_FindUnicodeChar(self, tx, x, text_len, !include_splits, 1); - - /* Append the slice to list if it is not empty */ - if (x > z) { - s = PyUnicode_FromUnicode(&tx[z], x - z); - if (!s) - goto onError; - if (listitem < listsize) - PyList_SET_ITEM(list,listitem,s); - else { - PyList_Append(list,s); - Py_DECREF(s); - } - listitem++; - } - } - } -#endif - else - Py_Error(PyExc_TypeError, - "expected string or unicode"); - - /* Resize list if necessary */ - if (listitem < listsize) - PyList_SetSlice(list, listitem, listsize, (PyObject*)NULL); - - return list; - - onError: - Py_XDECREF(list); - return NULL; -} - -/* methods */ - -Py_C_Function( mxCharSet_contains, - ".contains(char)\n\n" - ) -{ - PyObject *chr; - int rc; - - Py_GetArg("O:CharSet.contains", chr); - - rc = mxCharSet_Contains(self, chr); - if (rc < 0) - goto onError; - return PyInt_FromLong(rc); - - onError: - return NULL; -} - -Py_C_Function( mxCharSet_search, - ".search(text[, direction=1, start=0, stop=len(text)])\n\n" - ) -{ - PyObject *text; - int direction = 1; - int start = 0, stop = INT_MAX; - int rc; - - Py_Get4Args("O|iii:CharSet.search", text, direction, start, stop); - - rc = mxCharSet_Search(self, text, start, stop, direction); - if (rc == -1) - Py_ReturnNone(); - if (rc < -1) - goto onError; - return PyInt_FromLong(rc); - - onError: - return NULL; -} - -Py_C_Function( mxCharSet_match, - ".match(text[, direction=1, start=0, stop=len(text)])\n\n" - ) -{ - PyObject *text; - int direction = 1; - int start = 0, stop = INT_MAX; - int rc; - - Py_Get4Args("O|iii:CharSet.match", text, direction, start, stop); - - rc = mxCharSet_Match(self, text, start, stop, direction); - if (rc < 0) - goto onError; - return PyInt_FromLong(rc); - - onError: - return NULL; -} - -Py_C_Function( mxCharSet_split, - ".split(text[, start=0, stop=len(text)])\n\n" - ) -{ - PyObject *text; - int start = 0, stop = INT_MAX; - - Py_Get3Args("O|ii:CharSet.split", text, start, stop); - - return mxCharSet_Split(self, text, start, stop, 0); - - onError: - return NULL; -} - -Py_C_Function( mxCharSet_splitx, - ".splitx(text[, start=0, stop=len(text)])\n\n" - ) -{ - PyObject *text; - int start = 0, stop = INT_MAX; - - Py_Get3Args("O|ii:CharSet.splitx", text, start, stop); - - return mxCharSet_Split(self, text, start, stop, 1); - - onError: - return NULL; -} - -Py_C_Function( mxCharSet_strip, - ".strip(text[, where=0, start=0, stop=len(text)])\n\n" - ) -{ - PyObject *text; - int where = 0; - int start = 0, stop = INT_MAX; - - Py_Get4Args("O|iii:CharSet.strip", text, where, start, stop); - - return mxCharSet_Strip(self, text, start, stop, where); - - onError: - return NULL; -} - -#ifdef COPY_PROTOCOL -Py_C_Function( mxCharSet_copy, - "copy([memo])\n\n" - "Return a new reference for the instance. This function\n" - "is used for the copy-protocol. Real copying doesn't take\n" - "place, since the instances are immutable.") -{ - PyObject *memo; - - Py_GetArg("|O",memo); - Py_INCREF(cs); - return (PyObject *)cs; - onError: - return NULL; -} -#endif - -#undef cs - -/* --- slots --- */ - -static -PyObject *mxCharSet_Repr(mxCharSetObject *self) -{ - PyObject *v; - char t[500], *reprstr; - - v = PyObject_Repr(self->definition); - if (v == NULL) - return NULL; - reprstr = PyString_AsString(v); - if (reprstr == NULL) - return NULL; - sprintf(t, "", - reprstr, (long)self); - Py_DECREF(v); - return PyString_FromString(t); -} - -static -PyObject *mxCharSet_GetAttr(mxCharSetObject *self, - char *name) -{ - PyObject *v; - - if (Py_WantAttr(name,"definition")) { - v = self->definition; - Py_INCREF(v); - return v; - } - - else if (Py_WantAttr(name,"__members__")) - return Py_BuildValue("[s]", - "definition"); - - return Py_FindMethod(mxCharSet_Methods, (PyObject *)self, (char *)name); -} - -/* Python Type Tables */ - -static -PySequenceMethods mxCharSet_TypeAsSequence = { - (inquiry)0, /*sq_length*/ - (binaryfunc)0, /*sq_concat*/ - (intargfunc)0, /*sq_repeat*/ - (intargfunc)0, /*sq_item*/ - (intintargfunc)0, /*sq_slice*/ - (intobjargproc)0, /*sq_ass_item*/ - (intintobjargproc)0, /*sq_ass_slice*/ -#if PY_VERSION_HEX >= 0x02000000 - (objobjproc)mxCharSet_Contains, /*sq_contains*/ -#endif -}; - -PyTypeObject mxCharSet_Type = { - PyObject_HEAD_INIT(0) /* init at startup ! */ - 0, /* ob_size */ - "Character Set", /* tp_name */ - sizeof(mxCharSetObject), /* tp_basicsize */ - 0, /* tp_itemsize */ - /* methods */ - (destructor)mxCharSet_Free, /* tp_dealloc */ - (printfunc)0, /* tp_print */ - (getattrfunc)mxCharSet_GetAttr, /* tp_getattr */ - (setattrfunc)0, /* tp_setattr */ - (cmpfunc)0, /* tp_compare */ - (reprfunc)mxCharSet_Repr, /* tp_repr */ - 0, /* tp_as_number */ - &mxCharSet_TypeAsSequence, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - (hashfunc)0, /* tp_hash */ - (ternaryfunc)0, /* tp_call */ - (reprfunc)0, /* tp_str */ - (getattrofunc)0, /* tp_getattro */ - (setattrofunc)0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /* tp_flags */ - (char*) 0, /* tp_doc */ -}; - -/* Python Method Table */ - -statichere -PyMethodDef mxCharSet_Methods[] = -{ - Py_MethodListEntry("contains",mxCharSet_contains), - Py_MethodListEntry("search",mxCharSet_search), - Py_MethodListEntry("match",mxCharSet_match), - Py_MethodListEntry("strip",mxCharSet_strip), - Py_MethodListEntry("split",mxCharSet_split), - Py_MethodListEntry("splitx",mxCharSet_splitx), -#ifdef COPY_PROTOCOL - Py_MethodListEntry("__deepcopy__",mxCharSet_copy), - Py_MethodListEntry("__copy__",mxCharSet_copy), -#endif - {NULL,NULL} /* end of list */ -}; - -/* --- Tag Table Object ------------------------------------------------*/ - -staticforward PyMethodDef mxTagTable_Methods[]; - -PyObject *mxTagTable_New(PyObject *definition, - int tabletype, - int cacheable); - -/* internal APIs */ - -static -PyObject *tc_get_item(register PyObject *obj, - register int i) -{ - if (PyTuple_Check(obj)) { - if (i > PyTuple_GET_SIZE(obj)) - return NULL; - return PyTuple_GET_ITEM(obj, i); - } - else if (PyList_Check(obj)) { - if (i > PyList_GET_SIZE(obj)) - return NULL; - return PyList_GET_ITEM(obj, i); - } - else - return NULL; -} - -static -int tc_length(register PyObject *obj) -{ - if (obj == NULL) - return -1; - else if (PyTuple_Check(obj)) - return PyTuple_GET_SIZE(obj); - else if (PyList_Check(obj)) - return PyList_GET_SIZE(obj); - else - return -1; -} - -/* Add a jump target to the jump dictionary */ - -static -int tc_add_jumptarget(PyObject *jumpdict, - PyObject *targetname, - int index) -{ - PyObject *v; - - v = PyDict_GetItem(jumpdict, targetname); - if (v != NULL) - Py_ErrorWithArg(PyExc_TypeError, - "tag table entry %i: " - "jump target already defined", index); - v = PyInt_FromLong(index); - if (v == NULL) - goto onError; - if (PyDict_SetItem(jumpdict, targetname, v)) - goto onError; - Py_DECREF(v); - return 0; - - onError: - return -1; -} - -/* Convert a string command argument to either an 8-bit string or - Unicode depending on the tabletype. */ - -static -PyObject *tc_convert_string_arg(PyObject *arg, - int tableposition, - int tabletype) -{ - /* Convert to strings */ - if (tabletype == MXTAGTABLE_STRINGTYPE) { - if (PyString_Check(arg)) - return arg; -#ifdef HAVE_UNICODE - else if (PyUnicode_Check(arg)) { - Py_DECREF(arg); - arg = PyUnicode_AsEncodedString(arg, - NULL, - NULL); - if (arg == NULL) - Py_ErrorWithArg(PyExc_TypeError, - "tag table entry %i: " - "conversion from Unicode to " - "string failed", tableposition); - } -#endif - else - Py_ErrorWithArg(PyExc_TypeError, - "tag table entry %i: " - "command argument must be a " - "string or unicode", tableposition); - } - -#ifdef HAVE_UNICODE - /* Convert to Unicode */ - else if (tabletype == MXTAGTABLE_UNICODETYPE) { - if (PyUnicode_Check(arg)) - return arg; - else if (PyString_Check(arg)) { - Py_DECREF(arg); - arg = PyUnicode_Decode(PyString_AS_STRING(arg), - PyString_GET_SIZE(arg), - NULL, - NULL); - if (arg == NULL) - Py_ErrorWithArg(PyExc_TypeError, - "tag table entry %i: " - "conversion from string to " - "Unicode failed", tableposition); - } - else - Py_ErrorWithArg(PyExc_TypeError, - "tag table entry %i: " - "command argument must be a " - "string or unicode", tableposition); - } -#endif - - else - Py_Error(mxTextTools_Error, - "unsupported table type"); - - return arg; - - onError: - return NULL; -} - -/* Cleanup any references in the tag table. */ - -static -int tc_cleanup(mxTagTableObject *tagtable) -{ - int i; - for (i = 0; i < tagtable->ob_size; i++) { - mxTagTableEntry *tagtableentry = &tagtable->entry[i]; - - Py_XDECREF(tagtableentry->tagobj); - tagtableentry->tagobj = NULL; - Py_XDECREF(tagtableentry->args); - tagtableentry->args = NULL; - } - return 0; -} - -/* Initialize the tag table (this is the actual Tag Table compiler) */ - -static -int init_tag_table(mxTagTableObject *tagtable, - PyObject *table, - int size, - int tabletype, - int cacheable) -{ - int i; - PyObject *entry; - int entry_len; - PyObject *tagobj, *command, *args = 0, *je, *jne; - PyObject *jumpdict, *v; - int secondpass, own_args = 0; - - jumpdict = PyDict_New(); - if (jumpdict == NULL) - return -1; - - /* Reset to all fields to 0 */ - memset(&tagtable->entry[0], 0, size * sizeof(mxTagTableEntry)); - - /* First pass */ - secondpass = 0; - for (i = 0; i < size; i++) { - mxTagTableEntry *tagtableentry = &tagtable->entry[i]; - - /* Get table entry i and parse it */ - entry = tc_get_item(table, i); - if (entry == NULL) { - Py_ErrorWithArg(PyExc_TypeError, - "tag table entry %i: " - "not found or not a supported entry type", i); - } - - /* Special handling for jump marks (args is set to the jump - mark string, jump target index is the next table entry) */ - if (PyString_Check(entry)) { - if (tc_add_jumptarget(jumpdict, entry, i + 1)) - goto onError; - tagtableentry->tagobj = NULL; - tagtableentry->cmd = MATCH_JUMPTARGET; - tagtableentry->flags = 0; - Py_INCREF(entry); - tagtableentry->args = entry; - tagtableentry->jne = 0; - tagtableentry->je = 1; - continue; - } - - /* Get entry length */ - entry_len = tc_length(entry); - if (entry_len < 3) { - Py_ErrorWithArg(PyExc_TypeError, - "tag table entry %i: " - "expected an entry of the form " - "(tagobj,command,arg[,jne[,je]])", i); - } - - /* Decode entry parts: (tagobj, command, args[, jne[, je]]) */ - tagobj = tc_get_item(entry, 0); - command = tc_get_item(entry, 1); - args = tc_get_item(entry, 2); - if (entry_len >= 4) - jne = tc_get_item(entry, 3); - else - jne = NULL; - if (entry_len >= 5) - je = tc_get_item(entry, 4); - else - je = NULL; - - if (tagobj == NULL || - command == NULL || - args == NULL || - (entry_len >= 4 && jne == NULL) || - (entry_len >= 5 && je == NULL)) { - Py_ErrorWithArg(PyExc_TypeError, - "tag table entry %i: " - "expected an entry of the form " - "(tagobj,command,arg[,jne[,je]])", i); - } - - /* Store tagobj, None gets converted to NULL */ - if (tagobj != Py_None) - Py_INCREF(tagobj); - else - tagobj = NULL; - tagtableentry->tagobj = tagobj; - - /* Decode command and flags */ - Py_AssertWithArg(PyInt_Check(command), - PyExc_TypeError, - "tag table entry %i: " - "command must be an integer",i); - tagtableentry->cmd = PyInt_AS_LONG(command) & 0xFF; - tagtableentry->flags = PyInt_AS_LONG(command) - tagtableentry->cmd; - - /* Check command arguments */ - Py_INCREF(args); - own_args = 1; - - switch (tagtableentry->cmd) { - - case MATCH_JUMP: /* == MATCH_FAIL */ - case MATCH_EOF: - case MATCH_LOOP: - /* args is ignored */ - break; - - case MATCH_SKIP: - case MATCH_MOVE: - case MATCH_LOOPCONTROL: - Py_AssertWithArg(PyInt_Check(args), - PyExc_TypeError, - "tag table entry %i: " - "Skip|Move|LoopControl command argument " - "must be an integer", i); - break; - - case MATCH_JUMPTARGET: - Py_AssertWithArg(PyString_Check(args), - PyExc_TypeError, - "tag table entry %i: " - "JumpMark command argument must be a string",i); - if (tc_add_jumptarget(jumpdict, args, i + 1)) - goto onError; - break; - - case MATCH_ALLIN: - case MATCH_ALLNOTIN: - case MATCH_IS: - case MATCH_ISIN: - case MATCH_ISNOTIN: - case MATCH_WORD: - case MATCH_WORDSTART: - case MATCH_WORDEND: - args = tc_convert_string_arg(args, i, tabletype); - if (args == NULL) - goto onError; - break; - - case MATCH_ALLINSET: - case MATCH_ISINSET: - Py_AssertWithArg(PyString_Check(args) && - PyString_GET_SIZE(args) == 32, - PyExc_TypeError, - "tag table entry %i: " - "AllInSet|IsInSet command argument must " - "be a set() string",i); - break; - - case MATCH_ALLINCHARSET: - case MATCH_ISINCHARSET: - Py_AssertWithArg(mxCharSet_Check(args), - PyExc_TypeError, - "tag table entry %i: " - "AllInCharSet|IsInCharSet command argument must " - "be a CharSet instance",i); - break; - - case MATCH_SWORDSTART: /* == MATCH_NOWORD */ - case MATCH_SWORDEND: - case MATCH_SFINDWORD: - Py_AssertWithArg(mxTextSearch_Check(args), - PyExc_TypeError, - "tag table entry %i: " - "sWordStart|sWordEnd|sFindWord command " - "argument must be a TextSearch search " - "object",i); - break; - - case MATCH_TABLE: - case MATCH_SUBTABLE: - Py_AssertWithArg(mxTagTable_Check(args) || - PyTuple_Check(args) || - PyList_Check(args) || - (PyInt_Check(args) && - PyInt_AS_LONG(args) == MATCH_THISTABLE), - PyExc_TypeError, - "tag table entry %i: " - "Table|SubTable command argument " - "must be a tag table tuple/object or " - "ThisTable", i); - /* XXX We shouldn't recursively compile tag table tuples here - because this will slow down the compile process - too much and it's not clear whether this particular - table will ever be used during tagging. - */ - if (!mxTagTable_Check(args) && !PyInt_Check(args)) { - Py_DECREF(args); - args = mxTagTable_New(args, tabletype, cacheable); - if (args == NULL) - goto onError; - } - break; - - case MATCH_TABLEINLIST: - case MATCH_SUBTABLEINLIST: - Py_AssertWithArg(PyTuple_Check(args) && - PyTuple_GET_SIZE(args) == 2 && - PyList_Check(PyTuple_GET_ITEM(args, 0)) && - PyInt_Check(PyTuple_GET_ITEM(args, 1)), - PyExc_TypeError, - "tag table entry %i: " - "TableInList|SubTableInList command argument " - "must be a 2-tuple (list, integer)", - i); - break; - - case MATCH_CALL: - Py_AssertWithArg(PyCallable_Check(args), - PyExc_TypeError, - "tag table entry %i: " - "Call command argument " - "must be a callable object", - i); - break; - - case MATCH_CALLARG: - Py_AssertWithArg(PyTuple_Check(args) && - PyTuple_GET_SIZE(args) > 0 && - PyCallable_Check(PyTuple_GET_ITEM(args, 0)), - PyExc_TypeError, - "tag table entry %i: " - "CallArg command argument " - "must be a tuple (fct,[arg0,arg1,...])", - i); - break; - - default: - Py_ErrorWith2Args(PyExc_TypeError, - "tag table entry %i: " - "unknown command integer: %i", - i, tagtableentry->cmd); - - } - - /* Store command args */ - tagtableentry->args = args; - own_args = 0; - - /* Decode jump offsets */ - if (jne) { - if (PyInt_Check(jne)) - tagtableentry->jne = PyInt_AS_LONG(jne); - else if (PyString_Check(jne)) { - /* Mark for back-patching */ - tagtableentry->jne = -424242; - secondpass = 1; - } - else - Py_ErrorWithArg(PyExc_TypeError, - "tag table entry %i: " - "jne must be an integer or string", i); - } - else - tagtableentry->jne = 0; - - if (je) { - if (PyInt_Check(je)) - tagtableentry->je = PyInt_AS_LONG(je); - else if (PyString_Check(je)) { - /* Mark for back-patching */ - tagtableentry->je = -424242; - secondpass = 1; - } - else - Py_ErrorWithArg(PyExc_TypeError, - "tag table entry %i: " - "je must be an integer or string", i); - } - else - tagtableentry->je = 1; - } - - /* Second pass (needed to patch string jump targets) */ - if (secondpass) - for (i = 0; i < size; i++) { - mxTagTableEntry *tagtableentry = &tagtable->entry[i]; - - if (tagtableentry->je != -424242 && - tagtableentry->jne != -424242) - continue; - - /* Entry (most probably) needs back-patching */ - entry = tc_get_item(table, i); - if (entry == NULL) { - Py_ErrorWithArg(PyExc_TypeError, - "tag table entry %i: " - "unexpected error (not found)", i); - } - - /* Get entry length */ - entry_len = tc_length(entry); - if (entry_len < 0) { - Py_ErrorWithArg(PyExc_TypeError, - "tag table entry %i: " - "unexpected error (no length)", i); - } - - /* Decode jump offsets */ - if (entry_len >= 4) - jne = tc_get_item(entry, 3); - else - jne = NULL; - if (entry_len >= 5) - je = tc_get_item(entry, 4); - else - je = NULL; - - /* Patch jump offsets */ - if (jne && PyString_Check(jne)) { - v = PyDict_GetItem(jumpdict, jne); - if (v == NULL || !PyInt_Check(v)) - Py_ErrorWith2Args(PyExc_TypeError, - "tag table entry %i: " - "jne jump target '%s' not found", - i, PyString_AS_STRING(jne)); - tagtableentry->jne = PyInt_AS_LONG(v) - i; - } - if (je && PyString_Check(je)) { - v = PyDict_GetItem(jumpdict, je); - if (v == NULL || !PyInt_Check(v)) - Py_ErrorWith2Args(PyExc_TypeError, - "tag table entry %i: " - "je jump target '%s' not found", - i, PyString_AS_STRING(je)); - tagtableentry->je = PyInt_AS_LONG(v) - i; - } - } - - Py_DECREF(jumpdict); - return 0; - - onError: - if (own_args) { - Py_DECREF(args); - } - return -1; -} - -/* Check the cache for an already compiled TagTable for this - definition. Return NULL in case of an error, Py_None without - INCREF in case no such table was found or the TagTable object. */ - -static -PyObject *consult_tagtable_cache(PyObject *definition, - int tabletype, - int cacheable) -{ - PyObject *v, *key, *tt; - - if (!PyTuple_Check(definition) || !cacheable) - return Py_None; - - key = PyTuple_New(2); - if (key == NULL) - goto onError; - v = PyInt_FromLong((long) definition); - if (v == NULL) - goto onError; - PyTuple_SET_ITEM(key, 0, v); - v = PyInt_FromLong(tabletype); - if (v == NULL) - goto onError; - PyTuple_SET_ITEM(key, 1, v); - tt = PyDict_GetItem(mxTextTools_TagTables, key); - Py_DECREF(key); - if (tt != NULL) { - Py_INCREF(tt); - return tt; - } - return Py_None; - - onError: - return NULL; -} - -/* Adds the compiled tagtable to the cache. Returns -1 in case of an - error, 0 on success. */ - -static -int add_to_tagtable_cache(PyObject *definition, - int tabletype, - int cacheable, - PyObject *tagtable) -{ - PyObject *v, *key; - int rc; - - if (!PyTuple_Check(definition) || !cacheable) - return 0; - - key = PyTuple_New(2); - if (key == NULL) - goto onError; - v = PyInt_FromLong((long) definition); - if (v == NULL) - goto onError; - PyTuple_SET_ITEM(key, 0, v); - v = PyInt_FromLong(tabletype); - if (v == NULL) - goto onError; - PyTuple_SET_ITEM(key, 1, v); - - /* Hard-limit the cache size */ - if (PyDict_Size(mxTextTools_TagTables) >= MAX_TAGTABLES_CACHE_SIZE) - PyDict_Clear(mxTextTools_TagTables); - - rc = PyDict_SetItem(mxTextTools_TagTables, key, tagtable); - Py_DECREF(key); - if (rc) - goto onError; - return 0; - - onError: - return -1; -} - - -/* allocation */ - -PyObject *mxTagTable_New(PyObject *definition, - int tabletype, - int cacheable) -{ - mxTagTableObject *tagtable = 0; - PyObject *v; - int size; - - /* First, consult the TagTable cache */ - v = consult_tagtable_cache(definition, tabletype, cacheable); - if (v == NULL) - goto onError; - else if (v != Py_None) - return v; - - size = tc_length(definition); - if (size < 0) - Py_Error(PyExc_TypeError, - "tag table definition must be a tuple or a list"); - - tagtable = PyObject_NEW_VAR(mxTagTableObject, &mxTagTable_Type, size); - if (tagtable == NULL) - goto onError; - if (cacheable) { - Py_INCREF(definition); - tagtable->definition = definition; - } - else - tagtable->definition = NULL; - tagtable->tabletype = tabletype; - - /* Compile table ... */ - if (init_tag_table(tagtable, definition, size, tabletype, cacheable)) - goto onError; - - /* Cache the compiled table if it is cacheable and derived from a - tuple */ - if (add_to_tagtable_cache(definition, tabletype, cacheable, - (PyObject *)tagtable)) - goto onError; - - return (PyObject *)tagtable; - - onError: - Py_XDECREF(tagtable); - return NULL; -} - -Py_C_Function( mxTagTable_TagTable, - "TagTable(definition[,cachable=1])\n\n" - ) -{ - PyObject *definition; - int cacheable = 1; - - Py_Get2Args("O|i:TagTable", definition, cacheable); - return mxTagTable_New(definition, 0, cacheable); - - onError: - return NULL; -} - -#ifdef HAVE_UNICODE -Py_C_Function( mxTagTable_UnicodeTagTable, - "TagTable(definition[,cachable=1])\n\n" - ) -{ - PyObject *definition; - int cacheable = 1; - - Py_Get2Args("O|i:UnicodeTagTable", definition, cacheable); - return mxTagTable_New(definition, 1, cacheable); - - onError: - return NULL; -} -#endif - -static -void mxTagTable_Free(mxTagTableObject *tagtable) -{ - tc_cleanup(tagtable); - Py_XDECREF(tagtable->definition); - PyObject_Del(tagtable); -} - -/* C APIs */ - -#define tagtable ((mxTagTableObject *)self) - -static -PyObject *mxTagTable_CompiledDefinition(PyObject *self) -{ - PyObject *tuple = 0, *v, *w; - int i; - int size; - - if (!mxTagTable_Check(self)) { - PyErr_BadInternalCall(); - goto onError; - } - - size = tagtable->ob_size; - tuple = PyTuple_New(size); - if (tuple == NULL) - goto onError; - - for (i = 0; i < size; i++) { - mxTagTableEntry *tagtableentry = &tagtable->entry[i]; - - /* Build tuple (tagobj, command, args, jne, je) */ - v = PyTuple_New(5); - if (v == NULL) - goto onError; - w = tagtableentry->tagobj; - if (w == NULL) - w = Py_None; - Py_INCREF(w); - PyTuple_SET_ITEM(v, 0, w); - PyTuple_SET_ITEM(v, 1, PyInt_FromLong(tagtableentry->cmd | - tagtableentry->flags)); - w = tagtableentry->args; - if (w == NULL) - w = Py_None; - Py_INCREF(w); - PyTuple_SET_ITEM(v, 2, w); - PyTuple_SET_ITEM(v, 3, PyInt_FromLong(tagtableentry->jne)); - PyTuple_SET_ITEM(v, 4, PyInt_FromLong(tagtableentry->je)); - if (PyErr_Occurred()) { - Py_DECREF(v); - goto onError; - } - PyTuple_SET_ITEM(tuple, i, v); - } - - return tuple; - - onError: - Py_XDECREF(tuple); - return NULL; -} - - -/* methods */ - -Py_C_Function( mxTagTable_compiled, - ".compiled()\n\n" - ) -{ - Py_NoArgsCheck(); - return mxTagTable_CompiledDefinition(self); - - onError: - return NULL; -} - -#ifdef COPY_PROTOCOL -Py_C_Function( mxTagTable_copy, - "copy([memo])\n\n" - "Return a new reference for the instance. This function\n" - "is used for the copy-protocol. Real copying doesn't take\n" - "place, since the instances are immutable.") -{ - PyObject *memo; - - Py_GetArg("|O",memo); - Py_INCREF(tagtable); - return (PyObject *)tagtable; - - onError: - return NULL; -} -#endif - -#undef tagtable - -/* --- slots --- */ - -static -PyObject *mxTagTable_Repr(mxTagTableObject *self) -{ - char t[100]; - - if (self->tabletype == MXTAGTABLE_STRINGTYPE) - sprintf(t,"", (long)self); - else if (self->tabletype == MXTAGTABLE_UNICODETYPE) - sprintf(t,"", (long)self); - else - sprintf(t,"", (long)self); - return PyString_FromString(t); -} - -static -PyObject *mxTagTable_GetAttr(mxTagTableObject *self, - char *name) -{ - PyObject *v; - - if (Py_WantAttr(name,"definition")) { - v = self->definition; - if (v == NULL) - v = Py_None; - Py_INCREF(v); - return v; - } - else if (Py_WantAttr(name,"__members__")) - return Py_BuildValue("[s]", - "definition"); - - return Py_FindMethod(mxTagTable_Methods, (PyObject *)self, (char *)name); -} - -/* Python Type Tables */ - -PyTypeObject mxTagTable_Type = { - PyObject_HEAD_INIT(0) /* init at startup ! */ - 0, /* ob_size */ - "Tag Table", /* tp_name */ - sizeof(mxTagTableObject), /* tp_basicsize */ - sizeof(mxTagTableEntry), /* tp_itemsize */ - /* methods */ - (destructor)mxTagTable_Free, /* tp_dealloc */ - (printfunc)0, /* tp_print */ - (getattrfunc)mxTagTable_GetAttr, /* tp_getattr */ - (setattrfunc)0, /* tp_setattr */ - (cmpfunc)0, /* tp_compare */ - (reprfunc)mxTagTable_Repr, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - (hashfunc)0, /* tp_hash */ - (ternaryfunc)0, /* tp_call */ - (reprfunc)0, /* tp_str */ - (getattrofunc)0, /* tp_getattro */ - (setattrofunc)0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /* tp_flags */ - (char*) 0, /* tp_doc */ -}; - -/* Python Method Table */ - -statichere -PyMethodDef mxTagTable_Methods[] = -{ - Py_MethodListEntryNoArgs("compiled",mxTagTable_compiled), -#ifdef COPY_PROTOCOL - Py_MethodListEntry("__deepcopy__",mxTagTable_copy), - Py_MethodListEntry("__copy__",mxTagTable_copy), -#endif - {NULL,NULL} /* end of list */ -}; - -/* --- Internal functions ----------------------------------------------*/ - -#ifdef HAVE_UNICODE - -/* Same as mxTextTools_Join() for Unicode objects. */ - -static -PyObject *mxTextTools_UnicodeJoin(PyObject *seq, - int start, - int stop, - PyObject *separator) -{ - PyObject *newstring = 0, *tempstr = 0; - int newstring_len,current_len = 0; - Py_UNICODE *p; - int i; - Py_UNICODE *sep; - int sep_len; - - if (separator) { - separator = PyUnicode_FromObject(separator); - if (separator == NULL) - goto onError; - sep = PyUnicode_AS_UNICODE(separator); - sep_len = PyUnicode_GET_SIZE(separator); - } - else { - sep = NULL; - sep_len = 0; - } - - /* Create an empty new string */ - newstring_len = (10 + sep_len) * (stop - start); - newstring = PyUnicode_FromUnicode(NULL, newstring_len); - if (newstring == NULL) - goto onError; - p = PyUnicode_AS_UNICODE(newstring); - - /* Join with separator */ - for (i = start; i < stop; i++) { - register PyObject *o; - Py_UNICODE *st; - int len_st; - - o = PySequence_GetItem(seq, i); - - if PyTuple_Check(o) { - /* Tuple entry: (string,l,r,[...]) */ - register int l,r; - - /* parse tuple */ - Py_Assert((PyTuple_GET_SIZE(o) >= 3) && - PyInt_Check(PyTuple_GET_ITEM(o,1)) && - PyInt_Check(PyTuple_GET_ITEM(o,2)), - PyExc_TypeError, - "tuples must be of the format (string,l,r[,...])"); - tempstr = PyUnicode_FromObject(PyTuple_GET_ITEM(o,0)); - if (tempstr == NULL) - goto onError; - st = PyUnicode_AS_UNICODE(tempstr); - len_st = PyUnicode_GET_SIZE(tempstr); - l = PyInt_AS_LONG(PyTuple_GET_ITEM(o,1)); - r = PyInt_AS_LONG(PyTuple_GET_ITEM(o,2)); - - /* compute slice */ - if (r > len_st) r = len_st; - else if (r < 0) { - r += len_st + 1; - if (r < 0) - r = 0; - } - if (l > len_st) l = len_st; - else if (l < 0) { - l += len_st + 1; - if (l < 0) - l = 0; - } - - /* empty ? */ - if (l > r) - continue; - len_st = r - l; - if (len_st == 0) - continue; - - /* get pointer right */ - st += l; - } - else { - /* Must be a string entry: take the whole string */ - tempstr = PyUnicode_FromObject(o); - if (tempstr == NULL) - goto onError; - st = PyUnicode_AS_UNICODE(tempstr); - len_st = PyUnicode_GET_SIZE(tempstr); - } - - Py_DECREF(o); - - /* Resize the new string if needed */ - while (current_len + len_st + sep_len >= newstring_len) { - newstring_len += newstring_len >> 1; - if (PyUnicode_Resize(&newstring, newstring_len)) - goto onError; - p = PyUnicode_AS_UNICODE(newstring) + current_len; - } - - /* Insert separator */ - if (i > 0 && sep_len > 0) { - Py_UNICODE_COPY(p, sep, sep_len); - p += sep_len; - current_len += sep_len; - } - - /* Copy snippet into new string */ - Py_UNICODE_COPY(p, st, len_st); - p += len_st; - current_len += len_st; - - Py_DECREF(tempstr); - tempstr = NULL; - } - - /* Resize new string to the actual length */ - if (PyUnicode_Resize(&newstring, current_len)) - goto onError; - - Py_XDECREF(separator); - return newstring; - - onError: - Py_XDECREF(newstring); - Py_XDECREF(separator); - Py_XDECREF(tempstr); - return NULL; -} - -#endif - -/* Enhanced string join: also excepts tuple (text, left, right,...) - entries which then cause text[left:right] to be used as string - snippet. - - separator may be NULL; in that case, "" is used as separator. - -*/ - -static -PyObject *mxTextTools_Join(PyObject *seq, - int start, - int stop, - PyObject *separator) -{ - PyObject *newstring = 0; - int newstring_len, current_len = 0; - char *p; - int i; - char *sep; - int sep_len; - - if (separator) { -#ifdef HAVE_UNICODE - if (PyUnicode_Check(separator)) - return mxTextTools_UnicodeJoin(seq, start, stop, separator); -#endif - Py_Assert(PyString_Check(separator), - PyExc_TypeError, - "separator must be a string"); - sep = PyString_AS_STRING(separator); - sep_len = PyString_GET_SIZE(separator); - } - else { - sep = NULL; - sep_len = 0; - } - - /* Create an empty new string */ - newstring_len = (10 + sep_len) * (stop - start); - newstring = PyString_FromStringAndSize((char*)NULL, newstring_len); - if (newstring == NULL) - goto onError; - p = PyString_AS_STRING(newstring); - - /* Join with separator */ - for (i = start; i < stop; i++) { - register PyObject *o; - char *st; - int len_st; - - o = PySequence_GetItem(seq, i); - - if PyTuple_Check(o) { - /* Tuple entry: (string,l,r,[...]) */ - register int l,r; - - /* parse tuple */ - Py_Assert((PyTuple_GET_SIZE(o) >= 3) && - PyInt_Check(PyTuple_GET_ITEM(o,1)) && - PyInt_Check(PyTuple_GET_ITEM(o,2)), - PyExc_TypeError, - "tuples must be of the format (string,int,int[,...])"); -#ifdef HAVE_UNICODE - if (PyUnicode_Check(PyTuple_GET_ITEM(o,0))) { - /* Redirect to Unicode implementation; all previous work - is lost. */ - Py_DECREF(o); - Py_DECREF(newstring); - return mxTextTools_UnicodeJoin(seq, start, stop, separator); - } -#endif - Py_Assert(PyString_Check(PyTuple_GET_ITEM(o,0)), - PyExc_TypeError, - "tuples must be of the format (string,int,int[,...])"); - st = PyString_AS_STRING(PyTuple_GET_ITEM(o,0)); - len_st = PyString_GET_SIZE(PyTuple_GET_ITEM(o,0)); - l = PyInt_AS_LONG(PyTuple_GET_ITEM(o,1)); - r = PyInt_AS_LONG(PyTuple_GET_ITEM(o,2)); - - /* compute slice */ - if (r > len_st) r = len_st; - else if (r < 0) { - r += len_st + 1; - if (r < 0) - r = 0; - } - if (l > len_st) l = len_st; - else if (l < 0) { - l += len_st + 1; - if (l < 0) - l = 0; - } - - /* empty ? */ - if (l > r) - continue; - len_st = r - l; - if (len_st == 0) - continue; - - /* get pointer right */ - st += l; - } - else if (PyString_Check(o)) { - /* String entry: take the whole string */ - st = PyString_AS_STRING(o); - len_st = PyString_GET_SIZE(o); - } -#ifdef HAVE_UNICODE - else if (PyUnicode_Check(o)) { - /* Redirect to Unicode implementation; all previous work - is lost. */ - Py_DECREF(o); - Py_DECREF(newstring); - return mxTextTools_UnicodeJoin(seq, start, stop, separator); - } -#endif - else { - Py_DECREF(o); - Py_Error(PyExc_TypeError, - "list must contain tuples or strings as entries"); - } - - Py_DECREF(o); - - /* Resize the new string if needed */ - while (current_len + len_st + sep_len >= newstring_len) { - newstring_len += newstring_len >> 1; - if (_PyString_Resize(&newstring, newstring_len)) - goto onError; - p = PyString_AS_STRING(newstring) + current_len; - } - - /* Insert separator */ - if (i > 0 && sep_len > 0) { - memcpy(p, sep, sep_len); - p += sep_len; - current_len += sep_len; - } - - /* Copy snippet into new string */ - memcpy(p,st,len_st); - p += len_st; - current_len += len_st; - } - - /* Resize new string to the actual length */ - if (_PyString_Resize(&newstring, current_len)) - goto onError; - - return newstring; - - onError: - Py_XDECREF(newstring); - return NULL; -} - -static -PyObject *mxTextTools_HexStringFromString(char *str, - int len) -{ - PyObject *w = 0; - int i; - char *hex; - static const char hexdigits[] = "0123456789abcdef"; - - /* Convert to HEX */ - w = PyString_FromStringAndSize(NULL,2*len); - if (!w) - goto onError; - hex = PyString_AS_STRING(w); - for (i = 0; i < len; i ++) { - unsigned char c = (unsigned char)*str; - - *hex++ = hexdigits[c >> 4]; - *hex++ = hexdigits[c & 0x0F]; - str++; - } - return w; - - onError: - Py_XDECREF(w); - return NULL; -} - -static -PyObject *mxTextTools_StringFromHexString(char *hex, - int len) -{ - PyObject *w = 0; - int i; - char *str; - static const char hexdigits[] = "0123456789abcdef"; - - /* Convert to string */ - Py_Assert(len % 2 == 0, - PyExc_TypeError, - "need 2-digit hex string argument"); - len >>= 1; - w = PyString_FromStringAndSize(NULL,len); - if (!w) - goto onError; - str = PyString_AS_STRING(w); - for (i = 0; i < len; i++,str++) { - register char c; - register int j; - - c = tolower(*hex++); - for (j = 0; j < (int)sizeof(hexdigits); j++) - if (c == hexdigits[j]) { - *str = j << 4; - break; - } - if (j == sizeof(hexdigits)) { - DPRINTF("Failed: '%c' (%u) at %i\n",c,(unsigned int)c,i); - Py_Error(PyExc_ValueError, - "argument contains non-hex characters"); - } - - c = tolower(*hex++); - for (j = 0; j < (int)sizeof(hexdigits); j++) - if (c == hexdigits[j]) { - *str += j; - break; - } - if (j == sizeof(hexdigits)) { - DPRINTF("Failed2: '%c' (%u) at %i\n",c,(unsigned int)c,i); - Py_Error(PyExc_ValueError, - "argument contains non-hex characters"); - } - } - return w; - - onError: - Py_XDECREF(w); - return NULL; -} - -static -int mxTextTools_IsASCII(PyObject *text, - int left, - int right) -{ - if (PyString_Check(text)) { - int len; - register int i; - register unsigned char *str = (unsigned char *)PyString_AS_STRING(text); - - len = PyString_GET_SIZE(text); - Py_CheckSequenceSlice(len, left, right); - for (i = left; i < right; i++) - if (str[i] >= 128) - return 0; - return 1; - } - -#ifdef HAVE_UNICODE - else if (PyUnicode_Check(text)) { - int len; - register int i; - register Py_UNICODE *str = PyUnicode_AS_UNICODE(text); - - len = PyUnicode_GET_SIZE(text); - Py_CheckSequenceSlice(len, left, right); - for (i = left; i < right; i++) - if (str[i] >= 128) - return 0; - return 1; - } -#endif - - else - Py_Error(PyExc_TypeError, - "need string object"); - - onError: - return -1; -} - -/* Takes a list of tuples (replacement,l,r,...) and produces a taglist - suitable for mxTextTools_Join() which creates a copy of - text where every slice [l:r] is replaced by the given replacement. - -*/ - -static -PyObject *mxTextTools_Joinlist(PyObject *text, - PyObject *list, - int pos, - int text_len) -{ - PyObject *joinlist = 0; - int list_len; - int i; - int listitem = 0; - int listsize = INITIAL_LIST_SIZE; - - if (PyString_Check(text)) { - Py_CheckStringSlice(text, pos, text_len); - } -#ifdef HAVE_UNICODE - else if (PyUnicode_Check(text)) { - Py_CheckUnicodeSlice(text, pos, text_len); - } -#endif - else - Py_Error(PyExc_TypeError, - "expected string or unicode"); - - Py_Assert(PyList_Check(list), - PyExc_TypeError, - "expected a list of tuples as second argument"); - list_len = PyList_GET_SIZE(list); - - joinlist = PyList_New(listsize); - if (joinlist == NULL) - goto onError; - - for (i = 0; i < list_len; i++) { - register PyObject *t; - register int left, right; - - t = PyList_GET_ITEM(list, i); - Py_Assert(PyTuple_Check(t) && - (PyTuple_GET_SIZE(t) >= 3) && - (PyString_Check(PyTuple_GET_ITEM(t,0)) || - PyUnicode_Check(PyTuple_GET_ITEM(t,0))) && - PyInt_Check(PyTuple_GET_ITEM(t,1)) && - PyInt_Check(PyTuple_GET_ITEM(t,2)), - PyExc_TypeError, - "tuples must be of the form (string,int,int,...)"); - left = PyInt_AS_LONG(PyTuple_GET_ITEM(t,1)); - right = PyInt_AS_LONG(PyTuple_GET_ITEM(t,2)); - - Py_Assert(left >= pos, - PyExc_ValueError, - "list is not sorted ascending"); - - if (left > pos) { /* joinlist.append((text,pos,left)) */ - register PyObject *v; - register PyObject *w; - - v = PyTuple_New(3); - if (v == NULL) - goto onError; - - Py_INCREF(text); - PyTuple_SET_ITEM(v,0,text); - - w = PyInt_FromLong(pos); - if (w == NULL) - goto onError; - PyTuple_SET_ITEM(v,1,w); - - w = PyTuple_GET_ITEM(t,1); - Py_INCREF(w); - PyTuple_SET_ITEM(v,2,w); - - if (listitem < listsize) - PyList_SET_ITEM(joinlist,listitem,v); - else { - PyList_Append(joinlist,v); - Py_DECREF(v); - } - listitem++; - } - - /* joinlist.append(string) */ - if (listitem < listsize) { - register PyObject *v = PyTuple_GET_ITEM(t,0); - Py_INCREF(v); - PyList_SET_ITEM(joinlist,listitem,v); - } - else - PyList_Append(joinlist,PyTuple_GET_ITEM(t,0)); - listitem++; - - pos = right; - } - - if (pos < text_len) { /* joinlist.append((text,pos,text_len)) */ - register PyObject *v; - register PyObject *w; - - v = PyTuple_New(3); - if (v == NULL) - goto onError; - - Py_INCREF(text); - PyTuple_SET_ITEM(v,0,text); - - w = PyInt_FromLong(pos); - if (w == NULL) - goto onError; - PyTuple_SET_ITEM(v,1,w); - - w = PyInt_FromLong(text_len); - if (w == NULL) - goto onError; - PyTuple_SET_ITEM(v,2,w); - - if (listitem < listsize) - PyList_SET_ITEM(joinlist,listitem,v); - else { - PyList_Append(joinlist,v); - Py_DECREF(v); - } - listitem++; - } - - /* Resize list if necessary */ - if (listitem < listsize) - PyList_SetSlice(joinlist,listitem,listsize,(PyObject*)NULL); - - return joinlist; - - onError: - - Py_XDECREF(joinlist); - return NULL; -} - -#ifdef HAVE_UNICODE -static -PyObject *mxTextTools_UnicodeCharSplit(PyObject *text, - PyObject *separator, - int start, - int text_len) -{ - PyObject *list = NULL; - register int x; - int listitem = 0; - int listsize = INITIAL_LIST_SIZE; - Py_UNICODE *tx; - Py_UNICODE sep; - - text = PyUnicode_FromObject(text); - if (text == NULL) { - separator = NULL; - goto onError; - } - separator = PyUnicode_FromObject(separator); - if (separator == NULL) - goto onError; - - Py_CheckUnicodeSlice(text, start, text_len); - - Py_Assert(PyUnicode_GET_SIZE(separator) == 1, - PyExc_TypeError, - "separator must be a single character"); - - tx = PyUnicode_AS_UNICODE(text); - sep = *PyUnicode_AS_UNICODE(separator); - - list = PyList_New(listsize); - if (!list) - goto onError; - - x = start; - while (1) { - PyObject *s; - register int z; - - /* Skip to next separator */ - z = x; - for (;x < text_len; x++) - if (tx[x] == sep) - break; - - /* Append the slice to list */ - s = PyUnicode_FromUnicode(&tx[z], x - z); - if (!s) - goto onError; - if (listitem < listsize) - PyList_SET_ITEM(list,listitem,s); - else { - PyList_Append(list,s); - Py_DECREF(s); - } - listitem++; - - if (x == text_len) - break; - - /* Skip separator */ - x++; - } - - /* Resize list if necessary */ - if (listitem < listsize) - PyList_SetSlice(list,listitem,listsize,(PyObject*)NULL); - - Py_DECREF(text); - Py_DECREF(separator); - return list; - - onError: - Py_XDECREF(list); - Py_XDECREF(text); - Py_XDECREF(separator); - return NULL; -} -#endif - -static -PyObject *mxTextTools_CharSplit(PyObject *text, - PyObject *separator, - int start, - int text_len) -{ - PyObject *list = 0; - register int x; - int listitem = 0; - int listsize = INITIAL_LIST_SIZE; - char *tx; - char sep; - -#ifdef HAVE_UNICODE - if (PyUnicode_Check(text) || PyUnicode_Check(separator)) - return mxTextTools_UnicodeCharSplit(text, separator, - start, text_len); -#endif - - if (PyString_Check(text) && PyString_Check(separator)) { - Py_CheckStringSlice(text, start, text_len); - } - else - Py_Error(PyExc_TypeError, - "text and separator must be strings or unicode"); - - Py_Assert(PyString_GET_SIZE(separator) == 1, - PyExc_TypeError, - "separator must be a single character"); - - tx = PyString_AS_STRING(text); - sep = *PyString_AS_STRING(separator); - - list = PyList_New(listsize); - if (!list) - goto onError; - - x = start; - while (1) { - PyObject *s; - register int z; - - /* Skip to next separator */ - z = x; - for (;x < text_len; x++) - if (tx[x] == sep) - break; - - /* Append the slice to list */ - s = PyString_FromStringAndSize(&tx[z], x - z); - if (!s) - goto onError; - if (listitem < listsize) - PyList_SET_ITEM(list,listitem,s); - else { - PyList_Append(list,s); - Py_DECREF(s); - } - listitem++; - - if (x == text_len) - break; - - /* Skip separator */ - x++; - } - - /* Resize list if necessary */ - if (listitem < listsize) - PyList_SetSlice(list,listitem,listsize,(PyObject*)NULL); - - return list; - - onError: - Py_XDECREF(list); - return NULL; -} - -#ifdef HAVE_UNICODE -static -PyObject *mxTextTools_UnicodeSplitAt(PyObject *text, - PyObject *separator, - int nth, - int start, - int text_len) -{ - PyObject *tuple = 0; - register int x; - PyObject *s; - Py_UNICODE *tx; - Py_UNICODE sep; - - text = PyUnicode_FromObject(text); - if (text == NULL) { - separator = NULL; - goto onError; - } - separator = PyUnicode_FromObject(separator); - if (separator == NULL) - goto onError; - - Py_CheckUnicodeSlice(text, start, text_len); - - Py_Assert(PyUnicode_GET_SIZE(separator) == 1, - PyExc_TypeError, - "separator must be a single character"); - - tx = PyUnicode_AS_UNICODE(text); - sep = *PyUnicode_AS_UNICODE(separator); - - tuple = PyTuple_New(2); - if (!tuple) - goto onError; - - if (nth > 0) { - /* Skip to nth separator from the left */ - x = start; - while (1) { - for (; x < text_len; x++) - if (tx[x] == sep) - break; - if (--nth == 0 || x == text_len) - break; - x++; - } - } - else if (nth < 0) { - /* Skip to nth separator from the right */ - x = text_len - 1; - while (1) { - for (; x >= start; x--) - if (tx[x] == sep) - break; - if (++nth == 0 || x < start) - break; - x--; - } - } - else - Py_Error(PyExc_ValueError, - "nth must be non-zero"); - - /* Add to tuple */ - if (x < start) - s = PyUnicode_FromUnicode((Py_UNICODE *)"", 0); - else - s = PyUnicode_FromUnicode(&tx[start], x - start); - if (!s) - goto onError; - PyTuple_SET_ITEM(tuple,0,s); - - /* Skip separator */ - x++; - - if (x >= text_len) - s = PyUnicode_FromUnicode((Py_UNICODE *)"", 0); - else - s = PyUnicode_FromUnicode(&tx[x], text_len - x); - if (!s) - goto onError; - PyTuple_SET_ITEM(tuple,1,s); - - Py_DECREF(text); - Py_DECREF(separator); - return tuple; - - onError: - Py_XDECREF(tuple); - Py_XDECREF(text); - Py_XDECREF(separator); - return NULL; -} -#endif - -static -PyObject *mxTextTools_SplitAt(PyObject *text, - PyObject *separator, - int nth, - int start, - int text_len) -{ - PyObject *tuple = 0; - register int x; - PyObject *s; - char *tx; - char sep; - -#ifdef HAVE_UNICODE - if (PyUnicode_Check(text) || PyUnicode_Check(separator)) - return mxTextTools_UnicodeSplitAt(text, separator, - nth, start, text_len); -#endif - - if (PyString_Check(text) && PyString_Check(separator)) { - Py_CheckStringSlice(text, start, text_len); - } - else - Py_Error(PyExc_TypeError, - "text and separator must be strings or unicode"); - - Py_Assert(PyString_GET_SIZE(separator) == 1, - PyExc_TypeError, - "separator must be a single character"); - - tx = PyString_AS_STRING(text); - sep = *PyString_AS_STRING(separator); - - tuple = PyTuple_New(2); - if (!tuple) - goto onError; - - if (nth > 0) { - /* Skip to nth separator from the left */ - x = start; - while (1) { - for (; x < text_len; x++) - if (tx[x] == sep) - break; - if (--nth == 0 || x == text_len) - break; - x++; - } - } - else if (nth < 0) { - /* Skip to nth separator from the right */ - x = text_len - 1; - while (1) { - for (; x >= start; x--) - if (tx[x] == sep) - break; - if (++nth == 0 || x < start) - break; - x--; - } - } - else - Py_Error(PyExc_ValueError, - "nth must be non-zero"); - - /* Add to tuple */ - if (x < start) - s = PyString_FromStringAndSize("",0); - else - s = PyString_FromStringAndSize(&tx[start], x - start); - if (!s) - goto onError; - PyTuple_SET_ITEM(tuple,0,s); - - /* Skip separator */ - x++; - - if (x >= text_len) - s = PyString_FromStringAndSize("",0); - else - s = PyString_FromStringAndSize(&tx[x], text_len - x); - if (!s) - goto onError; - PyTuple_SET_ITEM(tuple,1,s); - - return tuple; - - onError: - Py_XDECREF(tuple); - return NULL; -} - -#ifdef HAVE_UNICODE -static -PyObject *mxTextTools_UnicodeSuffix(PyObject *text, - PyObject *suffixes, - int start, - int text_len, - PyObject *translate) -{ - int i; - Py_UNICODE *tx; - - text = PyUnicode_FromObject(text); - if (text == NULL) - goto onError; - - if (PyUnicode_Check(text)) { - Py_CheckUnicodeSlice(text, start, text_len); - } - else - Py_Error(PyExc_TypeError, - "expected unicode"); - Py_Assert(PyTuple_Check(suffixes), - PyExc_TypeError, - "suffixes needs to be a tuple of unicode strings"); - - /* XXX Add support for translate... */ - Py_Assert(translate == NULL, - PyExc_TypeError, - "translate is not supported for Unicode suffix()es"); - - tx = PyUnicode_AS_UNICODE(text); - - for (i = 0; i < PyTuple_GET_SIZE(suffixes); i++) { - PyObject *suffix = PyTuple_GET_ITEM(suffixes,i); - int start_cmp; - - suffix = PyUnicode_FromObject(suffix); - if (suffix == NULL) - goto onError; - - start_cmp = text_len - PyUnicode_GET_SIZE(suffix); - if (start_cmp >= start && - PyUnicode_AS_UNICODE(suffix)[0] == tx[start_cmp] && - memcmp(PyUnicode_AS_UNICODE(suffix), - &tx[start_cmp], - PyUnicode_GET_DATA_SIZE(suffix)) == 0) { - Py_DECREF(text); - return suffix; - } - - Py_DECREF(suffix); - } - - Py_DECREF(text); - Py_ReturnNone(); - - onError: - Py_XDECREF(text); - return NULL; -} -#endif - -static -PyObject *mxTextTools_Suffix(PyObject *text, - PyObject *suffixes, - int start, - int text_len, - PyObject *translate) -{ - int i; - char *tx; - -#ifdef HAVE_UNICODE - if (PyUnicode_Check(text)) - return mxTextTools_UnicodeSuffix(text, suffixes, - start, text_len, - translate); -#endif - - if (PyString_Check(text)) { - Py_CheckStringSlice(text, start, text_len); - } - else - Py_Error(PyExc_TypeError, - "expected string or unicode"); - Py_Assert(PyTuple_Check(suffixes), - PyExc_TypeError, - "suffixes needs to be a tuple of strings"); - tx = PyString_AS_STRING(text); - - if (translate) { - char *tr; - - Py_Assert(PyString_Check(translate) && - PyString_GET_SIZE(translate) == 256, - PyExc_TypeError, - "translate must be a string having 256 characters"); - tr = PyString_AS_STRING(translate); - - for (i = 0; i < PyTuple_GET_SIZE(suffixes); i++) { - PyObject *suffix = PyTuple_GET_ITEM(suffixes, i); - int start_cmp; - register char *s; - register char *t; - register int j; - - Py_AssertWithArg(PyString_Check(suffix), - PyExc_TypeError, - "tuple entry %i is not a string",i); - start_cmp = text_len - PyString_GET_SIZE(suffix); - if (start_cmp < start) - continue; - - /* Do the compare using a translate table */ - s = PyString_AS_STRING(suffix); - t = tx + start_cmp; - for (j = start_cmp; j < text_len; j++, s++, t++) - if (*s != tr[(unsigned char)*t]) - break; - if (j == text_len) { - Py_INCREF(suffix); - return suffix; - } - } - } - - else - for (i = 0; i < PyTuple_GET_SIZE(suffixes); i++) { - PyObject *suffix = PyTuple_GET_ITEM(suffixes,i); - int start_cmp; - - Py_AssertWithArg(PyString_Check(suffix), - PyExc_TypeError, - "tuple entry %i is not a string",i); - start_cmp = text_len - PyString_GET_SIZE(suffix); - if (start_cmp < start) - continue; - - /* Compare without translate table */ - if (PyString_AS_STRING(suffix)[0] == tx[start_cmp] - && - strncmp(PyString_AS_STRING(suffix), - &tx[start_cmp], - PyString_GET_SIZE(suffix)) == 0) { - Py_INCREF(suffix); - return suffix; - } - } - - Py_ReturnNone(); - - onError: - return NULL; -} - -#ifdef HAVE_UNICODE -static -PyObject *mxTextTools_UnicodePrefix(PyObject *text, - PyObject *prefixes, - int start, - int text_len, - PyObject *translate) -{ - int i; - Py_UNICODE *tx; - - text = PyUnicode_FromObject(text); - if (text == NULL) - goto onError; - - if (PyUnicode_Check(text)) { - Py_CheckUnicodeSlice(text, start, text_len); - } - else - Py_Error(PyExc_TypeError, - "expected unicode"); - Py_Assert(PyTuple_Check(prefixes), - PyExc_TypeError, - "prefixes needs to be a tuple of unicode strings"); - - /* XXX Add support for translate... */ - Py_Assert(translate == NULL, - PyExc_TypeError, - "translate is not supported for Unicode prefix()es"); - - tx = PyUnicode_AS_UNICODE(text); - - for (i = 0; i < PyTuple_GET_SIZE(prefixes); i++) { - PyObject *prefix = PyTuple_GET_ITEM(prefixes,i); - - prefix = PyUnicode_FromObject(prefix); - if (prefix == NULL) - goto onError; - - /* Compare without translate table */ - if (start + PyString_GET_SIZE(prefix) <= text_len && - PyUnicode_AS_UNICODE(prefix)[0] == tx[start] && - memcmp(PyUnicode_AS_UNICODE(prefix), - &tx[start], - PyUnicode_GET_DATA_SIZE(prefix)) == 0) { - Py_INCREF(prefix); - return prefix; - } - - Py_DECREF(prefix); - } - - Py_DECREF(text); - Py_ReturnNone(); - - onError: - Py_XDECREF(text); - return NULL; -} -#endif - -static -PyObject *mxTextTools_Prefix(PyObject *text, - PyObject *prefixes, - int start, - int text_len, - PyObject *translate) -{ - int i; - char *tx; - -#ifdef HAVE_UNICODE - if (PyUnicode_Check(text)) - return mxTextTools_UnicodePrefix(text, prefixes, - start, text_len, - translate); -#endif - - if (PyString_Check(text)) { - Py_CheckStringSlice(text, start, text_len); - } - else - Py_Error(PyExc_TypeError, - "expected string or unicode"); - Py_Assert(PyTuple_Check(prefixes), - PyExc_TypeError, - "prefixes needs to be a tuple of strings"); - tx = PyString_AS_STRING(text); - - if (translate) { - char *tr; - - Py_Assert(PyString_Check(translate) && - PyString_GET_SIZE(translate) == 256, - PyExc_TypeError, - "translate must be a string having 256 characters"); - tr = PyString_AS_STRING(translate); - - for (i = 0; i < PyTuple_GET_SIZE(prefixes); i++) { - PyObject *prefix = PyTuple_GET_ITEM(prefixes,i); - int cmp_len; - register char *s; - register char *t; - register int j; - - Py_AssertWithArg(PyString_Check(prefix), - PyExc_TypeError, - "tuple entry %i is not a string",i); - cmp_len = PyString_GET_SIZE(prefix); - if (start + cmp_len > text_len) - continue; - - /* Do the compare using a translate table */ - s = PyString_AS_STRING(prefix); - t = tx + start; - for (j = 0; j < cmp_len; j++, s++, t++) - if (*s != tr[(unsigned char)*t]) - break; - if (j == cmp_len) { - Py_INCREF(prefix); - return prefix; - } - } - } - - else - for (i = 0; i < PyTuple_GET_SIZE(prefixes); i++) { - PyObject *prefix = PyTuple_GET_ITEM(prefixes,i); - - Py_AssertWithArg(PyString_Check(prefix), - PyExc_TypeError, - "tuple entry %i is not a string",i); - if (start + PyString_GET_SIZE(prefix) > text_len) - continue; - - /* Compare without translate table */ - if (PyString_AS_STRING(prefix)[0] == tx[start] && - strncmp(PyString_AS_STRING(prefix), - &tx[start], - PyString_GET_SIZE(prefix)) == 0) { - Py_INCREF(prefix); - return prefix; - } - } - - Py_ReturnNone(); - - onError: - return NULL; -} - -/* Stips off characters appearing in the character set from text[start:stop] - and returns the result as Python string object. - - where indicates the mode: - where < 0: strip left only - where = 0: strip left and right - where > 0: strip right only - -*/ -static -PyObject *mxTextTools_SetStrip(char *tx, - int tx_len, - char *setstr, - int setstr_len, - int start, - int stop, - int where) -{ - int left, right; - - Py_Assert(setstr_len == 32, - PyExc_TypeError, - "separator needs to be a set as obtained from set()"); - Py_CheckBufferSlice(tx_len, start, stop); - - /* Strip left */ - if (where <= 0) { - register int x; - for (x = start; x < stop; x++) - if (!Py_CharInSet(tx[x], setstr)) - break; - left = x; - } - else - left = start; - - /* Strip right */ - if (where >= 0) { - register int x; - for (x = stop - 1; x >= start; x--) - if (!Py_CharInSet(tx[x], setstr)) - break; - right = x + 1; - } - else - right = stop; - - return PyString_FromStringAndSize(tx + left, max(right - left, 0)); - - onError: - return NULL; -} - -static -PyObject *mxTextTools_SetSplit(char *tx, - int tx_len, - char *setstr, - int setstr_len, - int start, - int text_len) -{ - PyObject *list = NULL; - register int x; - int listitem = 0; - int listsize = INITIAL_LIST_SIZE; - - Py_Assert(setstr_len == 32, - PyExc_TypeError, - "separator needs to be a set as obtained from set()"); - Py_CheckBufferSlice(tx_len,start,text_len); - - list = PyList_New(listsize); - if (!list) - goto onError; - - x = start; - while (x < text_len) { - int z; - - /* Skip all text in set */ - for (;x < text_len; x++) { - register unsigned int c = (unsigned char)tx[x]; - register unsigned int block = (unsigned char)setstr[c >> 3]; - if (!block || ((block & (1 << (c & 7))) == 0)) - break; - } - - /* Skip all text not in set */ - z = x; - for (;x < text_len; x++) { - register unsigned int c = (unsigned char)tx[x]; - register unsigned int block = (unsigned char)setstr[c >> 3]; - if (block && ((block & (1 << (c & 7))) != 0)) - break; - } - - /* Append the slice to list if it is not empty */ - if (x > z) { - PyObject *s; - s = PyString_FromStringAndSize((char *)&tx[z], x - z); - if (!s) - goto onError; - if (listitem < listsize) - PyList_SET_ITEM(list,listitem,s); - else { - PyList_Append(list,s); - Py_DECREF(s); - } - listitem++; - } - } - - /* Resize list if necessary */ - if (listitem < listsize) - PyList_SetSlice(list,listitem,listsize,(PyObject*)NULL); - - return list; - - onError: - Py_XDECREF(list); - return NULL; -} - -static -PyObject *mxTextTools_SetSplitX(char *tx, - int tx_len, - char *setstr, - int setstr_len, - int start, - int text_len) -{ - PyObject *list = NULL; - register int x; - int listitem = 0; - int listsize = INITIAL_LIST_SIZE; - - Py_Assert(setstr_len == 32, - PyExc_TypeError, - "separator needs to be a set as obtained from set()"); - Py_CheckBufferSlice(tx_len,start,text_len); - - list = PyList_New(listsize); - if (!list) - goto onError; - - x = start; - while (x < text_len) { - PyObject *s; - register int z; - - /* Skip all text not in set */ - z = x; - for (;x < text_len; x++) { - register unsigned int c = (unsigned char)tx[x]; - register unsigned int block = (unsigned char)setstr[c >> 3]; - if (block && ((block & (1 << (c & 7))) != 0)) - break; - } - - /* Append the slice to list */ - s = PyString_FromStringAndSize((char *)&tx[z], x - z); - if (!s) - goto onError; - if (listitem < listsize) - PyList_SET_ITEM(list,listitem,s); - else { - PyList_Append(list,s); - Py_DECREF(s); - } - listitem++; - - if (x >= text_len) - break; - - /* Skip all text in set */ - z = x; - for (;x < text_len; x++) { - register unsigned int c = (unsigned char)tx[x]; - register unsigned int block = (unsigned char)setstr[c >> 3]; - if (!block || ((block & (1 << (c & 7))) == 0)) - break; - } - - /* Append the slice to list if it is not empty */ - s = PyString_FromStringAndSize((char *)&tx[z], x - z); - if (!s) - goto onError; - if (listitem < listsize) - PyList_SET_ITEM(list,listitem,s); - else { - PyList_Append(list,s); - Py_DECREF(s); - } - listitem++; - } - - /* Resize list if necessary */ - if (listitem < listsize) - PyList_SetSlice(list,listitem,listsize,(PyObject*)NULL); - - return list; - - onError: - Py_XDECREF(list); - return NULL; -} - -static -PyObject *mxTextTools_Upper(PyObject *text) -{ - PyObject *ntext; - register unsigned char *s; - register unsigned char *orig; - register int i; - unsigned char *tr; - int len; - - Py_Assert(PyString_Check(text), - PyExc_TypeError, - "expected a Python string"); - - len = PyString_GET_SIZE(text); - ntext = PyString_FromStringAndSize(NULL,len); - if (!ntext) - goto onError; - - /* Translate */ - tr = (unsigned char *)PyString_AS_STRING(mx_ToUpper); - orig = (unsigned char *)PyString_AS_STRING(text); - s = (unsigned char *)PyString_AS_STRING(ntext); - for (i = 0; i < len; i++, s++, orig++) - *s = tr[*orig]; - - return ntext; - - onError: - return NULL; -} - -#ifdef HAVE_UNICODE -static -PyObject *mxTextTools_UnicodeUpper(PyObject *text) -{ - PyObject *ntext; - register Py_UNICODE *s; - register Py_UNICODE *orig; - register int i; - int len; - - text = PyUnicode_FromObject(text); - if (text == NULL) - goto onError; - - len = PyUnicode_GET_SIZE(text); - ntext = PyUnicode_FromUnicode(NULL, len); - if (!ntext) - goto onError; - - /* Translate */ - orig = (Py_UNICODE *)PyUnicode_AS_UNICODE(text); - s = (Py_UNICODE *)PyUnicode_AS_UNICODE(ntext); - for (i = 0; i < len; i++, s++, orig++) - *s = Py_UNICODE_TOUPPER(*orig); - - Py_DECREF(text); - return ntext; - - onError: - Py_XDECREF(text); - return NULL; -} -#endif - -static -PyObject *mxTextTools_Lower(PyObject *text) -{ - PyObject *ntext; - register unsigned char *s; - register unsigned char *orig; - register int i; - unsigned char *tr; - int len; - - Py_Assert(PyString_Check(text), - PyExc_TypeError, - "expected a Python string"); - - len = PyString_GET_SIZE(text); - ntext = PyString_FromStringAndSize(NULL,len); - if (!ntext) - goto onError; - - /* Translate */ - tr = (unsigned char *)PyString_AS_STRING(mx_ToLower); - orig = (unsigned char *)PyString_AS_STRING(text); - s = (unsigned char *)PyString_AS_STRING(ntext); - for (i = 0; i < len; i++, s++, orig++) - *s = tr[*orig]; - - return ntext; - - onError: - return NULL; -} - -#ifdef HAVE_UNICODE -static -PyObject *mxTextTools_UnicodeLower(PyObject *text) -{ - PyObject *ntext; - register Py_UNICODE *s; - register Py_UNICODE *orig; - register int i; - int len; - - text = PyUnicode_FromObject(text); - if (text == NULL) - goto onError; - - len = PyUnicode_GET_SIZE(text); - ntext = PyUnicode_FromUnicode(NULL, len); - if (!ntext) - goto onError; - - /* Translate */ - orig = (Py_UNICODE *)PyUnicode_AS_UNICODE(text); - s = (Py_UNICODE *)PyUnicode_AS_UNICODE(ntext); - for (i = 0; i < len; i++, s++, orig++) - *s = Py_UNICODE_TOLOWER(*orig); - - Py_DECREF(text); - return ntext; - - onError: - Py_XDECREF(text); - return NULL; -} -#endif - -/* --- Module functions ------------------------------------------------*/ - -/* Interface to the tagging engine in mxte.c */ - -Py_C_Function_WithKeywords( - mxTextTools_tag, - "tag(text,tagtable,sliceleft=0,sliceright=len(text),taglist=[],context=None) \n""" - "Produce a tag list for a string, given a tag-table\n" - "- returns a tuple (success, taglist, nextindex)\n" - "- if taglist == None, then no taglist is created" - ) -{ - PyObject *text; - PyObject *tagtable; - int sliceright = INT_MAX; - int sliceleft = 0; - PyObject *taglist = 0; - int taglist_len; - PyObject *context = 0; - int next, result; - PyObject *res; - - Py_KeywordsGet6Args("OO|iiOO:tag", - text,tagtable,sliceleft,sliceright,taglist,context); - - if (taglist == NULL) { - /* not given, so use default: an empty list */ - taglist = PyList_New(0); - if (taglist == NULL) - goto onError; - taglist_len = 0; - } - else { - Py_INCREF(taglist); - Py_Assert(PyList_Check(taglist) || taglist == Py_None, - PyExc_TypeError, - "taglist must be a list or None"); - if (taglist != Py_None) { - taglist_len = PyList_Size(taglist); - if (taglist_len < 0) - goto onError; - } - else - taglist_len = 0; - } - - Py_Assert(mxTagTable_Check(tagtable) || - PyTuple_Check(tagtable) || - PyList_Check(tagtable), - PyExc_TypeError, - "tagtable must be a TagTable instance, list or tuple"); - - /* Prepare the argument for the Tagging Engine and let it process - the request */ - if (PyString_Check(text)) { - - Py_CheckStringSlice(text, sliceleft, sliceright); - - if (!mxTagTable_Check(tagtable)) { - tagtable = mxTagTable_New(tagtable, MXTAGTABLE_STRINGTYPE, 1); - if (tagtable == NULL) - goto onError; - } - else if (mxTagTable_Type(tagtable) != MXTAGTABLE_STRINGTYPE) { - Py_Error(PyExc_TypeError, - "TagTable instance is not intended for parsing strings"); - } - else - Py_INCREF(tagtable); - - /* Call the Tagging Engine */ - result = mxTextTools_TaggingEngine(text, - sliceleft, - sliceright, - (mxTagTableObject *)tagtable, - taglist, - context, - &next); - Py_DECREF(tagtable); - - } -#ifdef HAVE_UNICODE - else if (PyUnicode_Check(text)) { - - Py_CheckUnicodeSlice(text, sliceleft, sliceright); - - if (!mxTagTable_Check(tagtable)) { - tagtable = mxTagTable_New(tagtable, 1, 1); - if (tagtable == NULL) - goto onError; - } - else if (mxTagTable_Type(tagtable) != MXTAGTABLE_UNICODETYPE) { - Py_Error(PyExc_TypeError, - "TagTable instance is not intended for parsing Unicode"); - } - else - Py_INCREF(tagtable); - - /* Call the Tagging Engine */ - result = mxTextTools_UnicodeTaggingEngine(text, - sliceleft, - sliceright, - (mxTagTableObject *)tagtable, - taglist, - context, - &next); - Py_DECREF(tagtable); - - } -#endif - else - Py_Error(PyExc_TypeError, - "text must be a string or unicode"); - - /* Check for exceptions during matching */ - if (result == 0) - goto onError; - - /* Undo changes to taglist in case of a match failure (result == 1) */ - if (result == 1 && taglist != Py_None) { - DPRINTF(" undoing changes: del taglist[%i:%i]\n", - taglist_len, PyList_Size(taglist)); - if (PyList_SetSlice(taglist, - taglist_len, - PyList_Size(taglist), - NULL)) - goto onError; - } - - /* Convert result to the documented external values: - 0 - no match, 1 - match. */ - result--; - - /* Build result tuple */ - res = PyTuple_New(3); - if (!res) - goto onError; - PyTuple_SET_ITEM(res,0,PyInt_FromLong(result)); - PyTuple_SET_ITEM(res,1,taglist); - PyTuple_SET_ITEM(res,2,PyInt_FromLong(next)); - return res; - - onError: - if (!PyErr_Occurred()) - Py_Error(PyExc_SystemError, - "NULL result without error in builtin tag()"); - Py_XDECREF(taglist); - return NULL; -} - -/* An extended version of string.join() for taglists: */ - -Py_C_Function( mxTextTools_join, - "join(joinlist,sep='',start=0,stop=len(joinlist))\n\n" - "Copy snippets from different strings together producing a\n" - "new string\n" - "The first argument must be a list of tuples or strings;\n" - "tuples must be of the form (string,l,r[,...]) and turn out\n" - "as string[l:r]\n" - "NOTE: the syntax used for negative slices is different\n" - "than the Python standard: -1 corresponds to the first\n" - "character *after* the string, e.g. ('Example',0,-1) gives\n" - "'Example' and not 'Exampl', like in Python\n" - "sep is an optional separator string, start and stop\n" - "define the slice of joinlist that is taken into accont." - ) -{ - PyObject *joinlist = NULL; - int joinlist_len; - PyObject *separator = NULL; - int start=0, stop=INT_MAX; - - Py_Get4Args("O|Oii:join", - joinlist,separator,start,stop); - - Py_Assert(PySequence_Check(joinlist), - PyExc_TypeError, - "first argument needs to be a sequence"); - - joinlist_len = PySequence_Length(joinlist); - Py_Assert(joinlist_len >= 0, - PyExc_TypeError, - "first argument needs to have a __len__ method"); - - Py_CheckSequenceSlice(joinlist_len, start, stop); - - /* Short-cut */ - if ((stop - start) <= 0) - return PyString_FromString(""); - - return mxTextTools_Join(joinlist, - start, stop, - separator); - - onError: - return NULL; -} - -/* - Special compare function for taglist-tuples, comparing - the text-slices given: - - slices starting at a smaller index come first - - for slices starting at the same index, the longer one - wins -*/ - -Py_C_Function( mxTextTools_cmp, - "cmp(a,b)\n\n" - "Compare two valid taglist tuples w/r to their slice\n" - "position; this is useful for sorting joinlists.") -{ - PyObject *v,*w; - int cmp; - - Py_Get2Args("OO:cmp",v,w); - - Py_Assert(PyTuple_Check(v) && PyTuple_Check(w) && - PyTuple_GET_SIZE(v) >= 3 && PyTuple_GET_SIZE(w) >= 3, - PyExc_TypeError, - "invalid taglist-tuple"); - - cmp = PyObject_Compare(PyTuple_GET_ITEM(v,1),PyTuple_GET_ITEM(w,1)); - if (cmp != 0) - return PyInt_FromLong(cmp); - cmp = - PyObject_Compare(PyTuple_GET_ITEM(v,2),PyTuple_GET_ITEM(w,2)); - return PyInt_FromLong(cmp); - - onError: - return NULL; -} - -Py_C_Function( mxTextTools_joinlist, - "joinlist(text,list,start=0,stop=len(text))\n\n" - "Takes a list of tuples (replacement,l,r,...) and produces\n" - "a taglist suitable for join() which creates a copy\n" - "of text where every slice [l:r] is replaced by the\n" - "given replacement\n" - "- the list must be sorted using cmp() as compare function\n" - "- it may not contain overlapping slices\n" - "- the slices may not contain negative indices\n" - "- if the taglist cannot contain overlapping slices, you can\n" - " give this function the taglist produced by tag() directly\n" - " (sorting is not needed, as the list will already be sorted)\n" - "- start and stop set the slice to work in, i.e. text[start:stop]" -) -{ - PyObject *list; - PyObject *text; - int text_len = INT_MAX; - int pos = 0; - - Py_Get4Args("OO|ii:joinlist",text,list,pos,text_len); - - return mxTextTools_Joinlist(text, list, pos, text_len); - - onError: - return NULL; -} - -Py_C_Function( mxTextTools_charsplit, - "charsplit(text,char,start=0,stop=len(text))\n\n" - "Split text[start:stop] into substrings at char and\n" - "return the result as list of strings." -) -{ - PyObject *text, *separator; - int text_len = INT_MAX; - int start = 0; - - Py_Get4Args("OO|ii:charsplit", - text,separator,start,text_len); - - return mxTextTools_CharSplit(text, separator, - start, text_len); - - onError: - return NULL; -} - -Py_C_Function( mxTextTools_splitat, - "splitat(text,char,nth=1,start=0,stop=len(text))\n\n" - "Split text[start:stop] into two substrings at the nth\n" - "occurance of char and return the result as 2-tuple. If the\n" - "character is not found, the second string is empty. nth may\n" - "be negative: the search is then done from the right and the\n" - "first string is empty in case the character is not found." -) -{ - PyObject *text, *separator; - int text_len = INT_MAX; - int start = 0; - int nth = 1; - - Py_Get5Args("OO|iii:splitat", - text,separator,nth,start,text_len); - - return mxTextTools_SplitAt(text, separator, - nth, start, text_len); - onError: - return NULL; -} - -Py_C_Function( mxTextTools_suffix, - "suffix(text,suffixes,start=0,stop=len(text)[,translate])\n\n" - "Looks at text[start:stop] and returns the first matching\n" - "suffix out of the tuple of strings given in suffixes.\n" - "If no suffix is found to be matching, None is returned.\n" - "The optional 256 char translate string is used to translate\n" - "the text prior to comparing it with the given suffixes." - ) -{ - PyObject *text, *suffixes, *translate = NULL; - int text_len = INT_MAX; - int start = 0; - - Py_Get5Args("OO|iiO:suffix", - text,suffixes,start,text_len,translate); - - return mxTextTools_Suffix(text, - suffixes, - start, text_len, - translate); - onError: - return NULL; -} - -Py_C_Function( mxTextTools_prefix, - "prefix(text,prefixes,start=0,stop=len(text)[,translate])\n\n" - "Looks at text[start:stop] and returns the first matching\n" - "prefix out of the tuple of strings given in prefixes.\n" - "If no prefix is found to be matching, None is returned.\n" - "The optional 256 char translate string is used to translate\n" - "the text prior to comparing it with the given suffixes." -) -{ - PyObject *text, *prefixes, *translate = NULL; - int text_len = INT_MAX; - int start = 0; - - Py_Get5Args("OO|iiO:prefix", - text,prefixes,start,text_len,translate); - - return mxTextTools_Prefix(text, - prefixes, - start, text_len, - translate); - onError: - return NULL; -} - -Py_C_Function( mxTextTools_set, - "set(string,logic=1)\n\n" - "Returns a character set for string: a bit encoded version\n" - "of the characters occurring in string.\n" - "- logic can be set to 0 if all characters *not* in string\n" - " should go into the set") -{ - PyObject *sto; - char *s,*st; - int len_s; - int logic = 1; - int i; - - Py_Get3Args("s#|i:set", - s,len_s,logic); - - sto = PyString_FromStringAndSize(NULL,32); - if (sto == NULL) - goto onError; - - st = PyString_AS_STRING(sto); - - if (logic) { - memset(st,0x00,32); - for (i = 0; i < len_s; i++,s++) { - int j = (unsigned char)*s; - - st[j >> 3] |= 1 << (j & 7); - } - } - else { - memset(st,0xFF,32); - for (i = 0; i < len_s; i++,s++) { - int j = (unsigned char)*s; - - st[j >> 3] &= ~(1 << (j & 7)); - } - } - return sto; - - onError: - return NULL; -} - -Py_C_Function( mxTextTools_setfind, - "setfind(text,set,start=0,stop=len(text))\n\n" - "Find the first occurence of any character from set in\n" - "text[start:stop]\n set must be a string obtained with set()\n" - "DEPRECATED: use CharSet().search() instead." -) -{ - PyObject *text; - PyObject *set; - int text_len = INT_MAX; - int start = 0; - register int x; - register char *tx; - register unsigned char *setstr; - - Py_Get4Args("OO|ii:setfind",text,set,start,text_len); - - Py_Assert(PyString_Check(text), - PyExc_TypeError, - "first argument needs to be a string"); - Py_Assert(PyString_Check(set) && PyString_GET_SIZE(set) == 32, - PyExc_TypeError, - "second argument needs to be a set"); - Py_CheckStringSlice(text,start,text_len); - - x = start; - tx = PyString_AS_STRING(text) + x; - setstr = (unsigned char *)PyString_AS_STRING(set); - - for (;x < text_len; tx++, x++) - if (Py_CharInSet(*tx,setstr)) - break; - - if (x == text_len) - /* Not found */ - return PyInt_FromLong(-1L); - else - return PyInt_FromLong(x); - - onError: - return NULL; -} - -Py_C_Function( mxTextTools_setstrip, - "setstrip(text,set,start=0,stop=len(text),mode=0)\n\n" - "Strip all characters in text[start:stop] appearing in set.\n" - "mode indicates where to strip (<0: left; =0: left and right;\n" - ">0: right). set must be a string obtained with set()\n" - "DEPRECATED: use CharSet().strip() instead." - ) -{ - char *tx; - int tx_len; - char *setstr; - int setstr_len; - int start = 0; - int stop = INT_MAX; - int mode = 0; - - Py_Get7Args("s#s#|iii:setstip", - tx,tx_len,setstr,setstr_len,start,stop,mode); - - return mxTextTools_SetStrip(tx, tx_len, - setstr, setstr_len, - start, stop, - mode); - - onError: - return NULL; -} - -Py_C_Function( mxTextTools_setsplit, - "setsplit(text,set,start=0,stop=len(text))\n\n" - "Split text[start:stop] into substrings using set,\n" - "omitting the splitting parts and empty substrings.\n" - "set must be a string obtained from set()\n" - "DEPRECATED: use CharSet().split() instead." - ) -{ - char *tx; - int tx_len; - char *setstr; - int setstr_len; - int start = 0; - int stop = INT_MAX; - - Py_Get6Args("s#s#|ii:setsplit", - tx,tx_len,setstr,setstr_len,start,stop); - - return mxTextTools_SetSplit(tx, tx_len, - setstr, setstr_len, - start, stop); - onError: - return NULL; -} - -Py_C_Function( mxTextTools_setsplitx, - "setsplitx(text,set,start=0,stop=len(text))\n\n" - "Split text[start:stop] into substrings using set, so\n" - "that every second entry consists only of characters in set.\n" - "set must be a string obtained with set()\n" - "DEPRECATED: use CharSet().splitx() instead." - ) -{ - int text_len = INT_MAX; - int start = 0; - char *tx; - int tx_len; - char *setstr; - int setstr_len; - - Py_Get6Args("s#s#|ii:setsplitx", - tx,tx_len,setstr,setstr_len,start,text_len); - - return mxTextTools_SetSplitX(tx, tx_len, - setstr, setstr_len, - start, text_len); - onError: - return NULL; -} - -Py_C_Function( mxTextTools_upper, - "upper(text)\n\n" - "Return text converted to upper case.") -{ - PyObject *text; - - Py_GetArgObject(text); - if (PyString_Check(text)) - return mxTextTools_Upper(text); -#ifdef HAVE_UNICODE - else if (PyUnicode_Check(text)) - return mxTextTools_UnicodeUpper(text); -#endif - else - Py_Error(PyExc_TypeError, - "expected string or unicode"); - - onError: - return NULL; -} - -Py_C_Function( mxTextTools_lower, - "lower(text)\n\n" - "Return text converted to lower case.") -{ - PyObject *text; - - Py_GetArgObject(text); - if (PyString_Check(text)) - return mxTextTools_Lower(text); -#ifdef HAVE_UNICODE - else if (PyUnicode_Check(text)) - return mxTextTools_UnicodeLower(text); -#endif - else - Py_Error(PyExc_TypeError, - "expected string or unicode"); - - onError: - return NULL; -} - -Py_C_Function( mxTextTools_str2hex, - "str2hex(text)\n\n" - "Return text converted to a string consisting of two byte\n" - "HEX values.") -{ - char *str; - int len; - - Py_Get2Args("s#",str,len); - - return mxTextTools_HexStringFromString(str,len); - - onError: - return NULL; -} - -Py_C_Function( mxTextTools_hex2str, - "hex2str(text)\n\n" - "Return text interpreted as two byte HEX values converted\n" - "to a string.") -{ - char *str; - int len; - - Py_Get2Args("s#",str,len); - - return mxTextTools_StringFromHexString(str,len); - - onError: - return NULL; -} - -Py_C_Function( mxTextTools_isascii, - "isascii(text,start=0,stop=len(text))\n\n" - "Return 1/0 depending on whether text only contains ASCII\n" - "characters." - ) -{ - PyObject *text; - int start=0, stop = INT_MAX; - int rc; - - Py_GetArgObject(text); - rc = mxTextTools_IsASCII(text, start, stop); - if (rc < 0) - goto onError; - return PyInt_FromLong(rc); - - onError: - return NULL; -} - -/* --- module init --------------------------------------------------------- */ - -/* Python Method Table */ - -static PyMethodDef Module_methods[] = -{ - Py_MethodWithKeywordsListEntry("tag",mxTextTools_tag), - Py_MethodListEntry("join",mxTextTools_join), - Py_MethodListEntry("cmp",mxTextTools_cmp), - Py_MethodListEntry("joinlist",mxTextTools_joinlist), - Py_MethodListEntry("set",mxTextTools_set), - Py_MethodListEntry("setfind",mxTextTools_setfind), - Py_MethodListEntry("setsplit",mxTextTools_setsplit), - Py_MethodListEntry("setsplitx",mxTextTools_setsplitx), - Py_MethodListEntry("setstrip",mxTextTools_setstrip), - Py_MethodWithKeywordsListEntry("TextSearch",mxTextSearch_TextSearch), - Py_MethodListEntry("CharSet",mxCharSet_CharSet), - Py_MethodListEntry("TagTable",mxTagTable_TagTable), -#ifdef HAVE_UNICODE - Py_MethodListEntry("UnicodeTagTable",mxTagTable_UnicodeTagTable), -#endif - Py_MethodListEntrySingleArg("upper",mxTextTools_upper), - Py_MethodListEntrySingleArg("lower",mxTextTools_lower), - Py_MethodListEntry("charsplit",mxTextTools_charsplit), - Py_MethodListEntry("splitat",mxTextTools_splitat), - Py_MethodListEntry("suffix",mxTextTools_suffix), - Py_MethodListEntry("prefix",mxTextTools_prefix), - Py_MethodListEntry("hex2str",mxTextTools_hex2str), - Py_MethodListEntry("str2hex",mxTextTools_str2hex), - Py_MethodListEntrySingleArg("isascii",mxTextTools_isascii), - {NULL,NULL} /* end of list */ -}; - -/* Cleanup function */ -static -void mxTextToolsModule_Cleanup(void) -{ - mxTextTools_TagTables = NULL; - - /* Reset mxTextTools_Initialized flag */ - mxTextTools_Initialized = 0; -} - -MX_EXPORT(void) - initmxTextTools(void) -{ - PyObject *module, *moddict; - - if (mxTextTools_Initialized) - Py_Error(PyExc_SystemError, - "can't initialize "MXTEXTTOOLS_MODULE" more than once"); - - /* Init type objects */ - PyType_Init(mxTextSearch_Type); -#ifdef MXFASTSEARCH - PyType_Init(mxFS_Type); -#endif - PyType_Init(mxCharSet_Type); - PyType_Init(mxTagTable_Type); - - /* create module */ - module = Py_InitModule4(MXTEXTTOOLS_MODULE, /* Module name */ - Module_methods, /* Method list */ - Module_docstring, /* Module doc-string */ - (PyObject *)NULL, /* always pass this as *self */ - PYTHON_API_VERSION); /* API Version */ - if (!module) - goto onError; - - /* Init TagTable cache */ - if ((mxTextTools_TagTables = PyDict_New()) == NULL) - goto onError; - - /* Register cleanup function */ - if (Py_AtExit(mxTextToolsModule_Cleanup)) - /* XXX what to do if we can't register that function ??? */; - - /* Add some symbolic constants to the module */ - moddict = PyModule_GetDict(module); - PyDict_SetItemString(moddict, - "__version__", - PyString_FromString(VERSION)); - - mx_ToUpper = mxTextTools_ToUpper(); - PyDict_SetItemString(moddict, - "to_upper", - mx_ToUpper); - - mx_ToLower = mxTextTools_ToLower(); - PyDict_SetItemString(moddict, - "to_lower", - mx_ToLower); - - /* Let the tag table cache live in the module dictionary; we just - keep a weak reference in mxTextTools_TagTables around. */ - PyDict_SetItemString(moddict, - "tagtable_cache", - mxTextTools_TagTables); - Py_DECREF(mxTextTools_TagTables); - - insint(moddict, "BOYERMOORE", MXTEXTSEARCH_BOYERMOORE); - insint(moddict, "FASTSEARCH", MXTEXTSEARCH_FASTSEARCH); - insint(moddict, "TRIVIAL", MXTEXTSEARCH_TRIVIAL); - - /* Init exceptions */ - if ((mxTextTools_Error = insexc(moddict, - "Error", - PyExc_StandardError)) == NULL) - goto onError; - - /* Type objects */ - Py_INCREF(&mxTextSearch_Type); - PyDict_SetItemString(moddict, "TextSearchType", - (PyObject *)&mxTextSearch_Type); - Py_INCREF(&mxCharSet_Type); - PyDict_SetItemString(moddict, "CharSetType", - (PyObject *)&mxCharSet_Type); - Py_INCREF(&mxTagTable_Type); - PyDict_SetItemString(moddict, "TagTableType", - (PyObject *)&mxTagTable_Type); - - /* Tag Table command symbols (these will be exposed via - simpleparse.stt.TextTools.Constants.TagTables) */ - insint(moddict, "_const_AllIn", MATCH_ALLIN); - insint(moddict, "_const_AllNotIn", MATCH_ALLNOTIN); - insint(moddict, "_const_Is", MATCH_IS); - insint(moddict, "_const_IsIn", MATCH_ISIN); - insint(moddict, "_const_IsNot", MATCH_ISNOTIN); - insint(moddict, "_const_IsNotIn", MATCH_ISNOTIN); - - insint(moddict, "_const_Word", MATCH_WORD); - insint(moddict, "_const_WordStart", MATCH_WORDSTART); - insint(moddict, "_const_WordEnd", MATCH_WORDEND); - - insint(moddict, "_const_AllInSet", MATCH_ALLINSET); - insint(moddict, "_const_IsInSet", MATCH_ISINSET); - insint(moddict, "_const_AllInCharSet", MATCH_ALLINCHARSET); - insint(moddict, "_const_IsInCharSet", MATCH_ISINCHARSET); - - insint(moddict, "_const_Fail", MATCH_FAIL); - insint(moddict, "_const_Jump", MATCH_JUMP); - insint(moddict, "_const_EOF", MATCH_EOF); - insint(moddict, "_const_Skip", MATCH_SKIP); - insint(moddict, "_const_Move", MATCH_MOVE); - - insint(moddict, "_const_JumpTarget", MATCH_JUMPTARGET); - - insint(moddict, "_const_sWordStart", MATCH_SWORDSTART); - insint(moddict, "_const_sWordEnd", MATCH_SWORDEND); - insint(moddict, "_const_sFindWord", MATCH_SFINDWORD); - insint(moddict, "_const_NoWord", MATCH_NOWORD); - - insint(moddict, "_const_Call", MATCH_CALL); - insint(moddict, "_const_CallArg", MATCH_CALLARG); - - insint(moddict, "_const_Table", MATCH_TABLE); - insint(moddict, "_const_SubTable", MATCH_SUBTABLE); - insint(moddict, "_const_TableInList", MATCH_TABLEINLIST); - insint(moddict, "_const_SubTableInList", MATCH_SUBTABLEINLIST); - - insint(moddict, "_const_Loop", MATCH_LOOP); - insint(moddict, "_const_LoopControl", MATCH_LOOPCONTROL); - - /* Tag Table command flags */ - insint(moddict, "_const_CallTag", MATCH_CALLTAG); - insint(moddict, "_const_AppendToTagobj", MATCH_APPENDTAG); - insint(moddict, "_const_AppendTagobj", MATCH_APPENDTAGOBJ); - insint(moddict, "_const_AppendMatch", MATCH_APPENDMATCH); - insint(moddict, "_const_LookAhead", MATCH_LOOKAHEAD); - - /* Tag Table argument integers */ - insint(moddict, "_const_To", MATCH_JUMP_TO); - insint(moddict, "_const_MatchOk", MATCH_JUMP_MATCHOK); - insint(moddict, "_const_MatchFail", MATCH_JUMP_MATCHFAIL); - insint(moddict, "_const_ToEOF", MATCH_MOVE_EOF); - insint(moddict, "_const_ToBOF", MATCH_MOVE_BOF); - insint(moddict, "_const_Here", MATCH_FAIL_HERE); - - insint(moddict, "_const_ThisTable", MATCH_THISTABLE); - - insint(moddict, "_const_Break", MATCH_LOOPCONTROL_BREAK); - insint(moddict, "_const_Reset", MATCH_LOOPCONTROL_RESET); - - DPRINTF("sizeof(string_charset)=%i bytes\n", sizeof(string_charset)); -#ifdef HAVE_UNICODE - DPRINTF("sizeof(unicode_charset)=%i bytes\n", sizeof(unicode_charset)); -#endif - - /* We are now initialized */ - mxTextTools_Initialized = 1; - - onError: - /* Check for errors and report them */ - if (PyErr_Occurred()) - Py_ReportModuleInitError(MXTEXTTOOLS_MODULE); - return; -} diff -Nru simpleparse-2.1.0a1/stt/TextTools/mxTextTools/mxTextTools.def simpleparse-2.2.0/stt/TextTools/mxTextTools/mxTextTools.def --- simpleparse-2.1.0a1/stt/TextTools/mxTextTools/mxTextTools.def 2006-02-18 23:40:03.000000000 +0000 +++ simpleparse-2.2.0/stt/TextTools/mxTextTools/mxTextTools.def 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -EXPORTS - initmxTextTools diff -Nru simpleparse-2.1.0a1/stt/TextTools/mxTextTools/mxTextTools.h simpleparse-2.2.0/stt/TextTools/mxTextTools/mxTextTools.h --- simpleparse-2.1.0a1/stt/TextTools/mxTextTools/mxTextTools.h 2006-02-18 23:40:03.000000000 +0000 +++ simpleparse-2.2.0/stt/TextTools/mxTextTools/mxTextTools.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,268 +0,0 @@ -#ifndef MXTEXTTOOLS_H -#define MXTEXTTOOLS_H -/* - mxTextTools -- Fast text manipulation routines - - Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com - Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com -*/ - -/* The extension's name; must be the same as the init function's suffix */ -#define MXTEXTTOOLS_MODULE "mxTextTools" - -#include "mxbmse.h" -#ifdef MXFASTSEARCH -# include "private/mxfse.h" -#endif - -/* Include generic mx extension header file */ -#include "mxh.h" - -#ifdef MX_BUILDING_MXTEXTTOOLS -# define MXTEXTTOOLS_EXTERNALIZE MX_EXPORT -#else -# define MXTEXTTOOLS_EXTERNALIZE MX_IMPORT -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -/* --- Text Search Object ---------------------------------------*/ - -/* Algorithm values */ -#define MXTEXTSEARCH_BOYERMOORE 0 -#define MXTEXTSEARCH_FASTSEARCH 1 -#define MXTEXTSEARCH_TRIVIAL 2 - -typedef struct { - PyObject_HEAD - PyObject *match; /* Match string object */ - PyObject *translate; /* Translate string object or NULL */ - int algorithm; /* Algorithm to be used */ - void *data; /* Internal data used by the algorithm or - NULL */ -} mxTextSearchObject; - -MXTEXTTOOLS_EXTERNALIZE(PyTypeObject) mxTextSearch_Type; - -#define mxTextSearch_Check(v) \ - (((mxTextSearchObject *)(v))->ob_type == &mxTextSearch_Type) - -/* Exporting these APIs for mxTextTools internal use only ! */ - -extern -int mxTextSearch_MatchLength(PyObject *self); - -extern -int mxTextSearch_SearchBuffer(PyObject *self, - char *text, - int start, - int stop, - int *sliceleft, - int *sliceright); - -#ifdef HAVE_UNICODE -extern -int mxTextSearch_SearchUnicode(PyObject *self, - Py_UNICODE *text, - int start, - int stop, - int *sliceleft, - int *sliceright); -#endif - -/* --- Character Set Object -------------------------------------*/ - -/* Mode values */ -#define MXCHARSET_8BITMODE 0 -#define MXCHARSET_UCS2MODE 1 -#define MXCHARSET_UCS4MODE 2 - -typedef struct { - PyObject_HEAD - PyObject *definition; /* Character set definition */ - int mode; /* Operation mode: - 0 - 8-bit character lookup - 1 - UCS-2 Unicode lookup - 2 - UCS-4 Unicode lookup - */ - void *lookup; /* Lookup table */ -} mxCharSetObject; - -MXTEXTTOOLS_EXTERNALIZE(PyTypeObject) mxCharSet_Type; - -#define mxCharSet_Check(v) \ - (((mxCharSetObject *)(v))->ob_type == &mxCharSet_Type) - - -/* Exporting these APIs for mxTextTools internal use only ! */ - -extern -int mxCharSet_ContainsChar(PyObject *self, - register unsigned char ch); - -#ifdef HAVE_UNICODE -extern -int mxCharSet_ContainsUnicodeChar(PyObject *self, - register Py_UNICODE ch); -#endif - -extern -int mxCharSet_Match(PyObject *self, - PyObject *text, - int start, - int stop, - int direction); - -/* --- Tag Table Object -----------------------------------------*/ - -typedef struct { - PyObject *tagobj; /* Tag object to assign, call, - append, etc. or NULL */ - int cmd; /* Command integer */ - int flags; /* Command flags */ - PyObject *args; /* Command arguments */ - int jne; /* Non-match jump offset */ - int je; /* Match jump offset */ -} mxTagTableEntry; - -#define MXTAGTABLE_STRINGTYPE 0 -#define MXTAGTABLE_UNICODETYPE 1 - -typedef struct { - PyObject_VAR_HEAD - PyObject *definition; /* Reference to the original - table definition or NULL; - needed for caching */ - int tabletype; /* Type of compiled table: - 0 - 8-bit string args - 1 - Unicode args */ - mxTagTableEntry entry[1]; /* Variable length array of - mxTagTableEntry fields; - ob_size gives the number of - allocated entries. */ -} mxTagTableObject; - -MXTEXTTOOLS_EXTERNALIZE(PyTypeObject) mxTagTable_Type; - -#define mxTagTable_Check(v) \ - (((mxTagTableObject *)(v))->ob_type == &mxTagTable_Type) - -#define mxTagTable_Type(v) \ - (((mxTagTableObject *)(v))->tabletype) -#define mxTagTable_Definition(v) \ - (((mxTagTableObject *)(v))->definition) - -/* Exporting these APIs for mxTextTools internal use only ! */ -extern -PyObject *mxTagTable_New(PyObject *definition, - int tabletype, - int cacheable); - -/* --- Tagging Engine -------------------------------------------*/ - -/* Exporting these APIs for mxTextTools internal use only ! */ - -/* mxTextTools_TaggingEngine(): a table driven parser engine - - - return codes: rc = 2: match ok; rc = 1: match failed; rc = 0: error - - doesn't check type of passed arguments ! - - doesn't increment reference counts of passed objects ! -*/ - -extern -int mxTextTools_TaggingEngine(PyObject *textobj, - int text_start, - int text_stop, - mxTagTableObject *table, - PyObject *taglist, - PyObject *context, - int *next); - -extern -int mxTextTools_UnicodeTaggingEngine(PyObject *textobj, - int text_start, - int text_stop, - mxTagTableObject *table, - PyObject *taglist, - PyObject *context, - int *next); - -/* Command integers for cmd; see Constants/TagTable.py for details */ - -/* Low-level string matching, using the same simple logic: - - match has to be a string - - they only modify x (the current position in text) -*/ -#define MATCH_ALLIN 11 -#define MATCH_ALLNOTIN 12 -#define MATCH_IS 13 -#define MATCH_ISIN 14 -#define MATCH_ISNOTIN 15 - -#define MATCH_WORD 21 -#define MATCH_WORDSTART 22 -#define MATCH_WORDEND 23 - -#define MATCH_ALLINSET 31 -#define MATCH_ISINSET 32 - -#define MATCH_ALLINCHARSET 41 -#define MATCH_ISINCHARSET 42 - -#define MATCH_MAX_LOWLEVEL 99 - -/* Jumps and other low-level special commands */ - -#define MATCH_FAIL 100 -#define MATCH_JUMP MATCH_FAIL - -#define MATCH_EOF 101 -#define MATCH_SKIP 102 -#define MATCH_MOVE 103 - -#define MATCH_JUMPTARGET 104 - -#define MATCH_MAX_SPECIALS 199 - -/* Higher-level string matching */ - -#define MATCH_SWORDSTART 211 -#define MATCH_SWORDEND 212 -#define MATCH_SFINDWORD 213 -#define MATCH_NOWORD MATCH_SWORDSTART - -/* Higher-level special commands */ -#define MATCH_CALL 201 -#define MATCH_CALLARG 202 -#define MATCH_TABLE 203 -#define MATCH_SUBTABLE 207 -#define MATCH_TABLEINLIST 204 -#define MATCH_SUBTABLEINLIST 208 -#define MATCH_LOOP 205 -#define MATCH_LOOPCONTROL 206 - -/* Special argument integers */ -#define MATCH_JUMP_TO 0 -#define MATCH_JUMP_MATCHOK 1000000 -#define MATCH_JUMP_MATCHFAIL -1000000 -#define MATCH_MOVE_EOF -1 -#define MATCH_MOVE_BOF 0 -#define MATCH_FAIL_HERE 1 -#define MATCH_THISTABLE 999 -#define MATCH_LOOPCONTROL_BREAK 0 -#define MATCH_LOOPCONTROL_RESET -1 - -/* Flags set in cmd (>=256) */ -#define MATCH_CALLTAG (1 << 8) -#define MATCH_APPENDTAG (1 << 9) -#define MATCH_APPENDTAGOBJ (1 << 10) -#define MATCH_APPENDMATCH (1 << 11) -#define MATCH_LOOKAHEAD (1 << 12) - -/* EOF */ -#ifdef __cplusplus -} -#endif -#endif diff -Nru simpleparse-2.1.0a1/stt/TextTools/mxTextTools/recursecommands.h simpleparse-2.2.0/stt/TextTools/mxTextTools/recursecommands.h --- simpleparse-2.1.0a1/stt/TextTools/mxTextTools/recursecommands.h 2006-02-18 23:40:03.000000000 +0000 +++ simpleparse-2.2.0/stt/TextTools/mxTextTools/recursecommands.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,119 +0,0 @@ -/* recursive tag-table commands */ - -case MATCH_TABLE: -case MATCH_SUBTABLE: -case MATCH_TABLEINLIST: -case MATCH_SUBTABLEINLIST: - { - PyObject * newTable = NULL; - - if (childReturnCode == NULL_CODE ) { - /* haven't yet parsed the sub-table match */ - switch (command) { - /* determine the table to which we will transfer control */ - case MATCH_TABLE: - case MATCH_SUBTABLE: - { - /* switch to either current tag table or a compiled sub-table */ - if (PyInt_Check(match) && - PyInt_AS_LONG(match) == MATCH_THISTABLE) { - newTable = (PyObject *)table; - } else { - newTable = match; - } - - /* XXX Fix to auto-compile that match argument - - Should also test that it _is_ a compiled TagTable, - rather than that it _isn't_ a tuple? - */ - if (!mxTagTable_Check(newTable)) { - childReturnCode = ERROR_CODE; - errorType = PyExc_TypeError; - errorMessage = PyString_FromFormat( - "Match argument must be compiled TagTable: was a %.50s", - newTable->ob_type->tp_name - ); - } else { - /* we decref in POP */ - Py_INCREF(newTable); - } - break; - } - case MATCH_TABLEINLIST: - case MATCH_SUBTABLEINLIST: - { - /* switch to explicitly specified table in a list (compiling if necessary) */ - - - newTable = PyList_GetItem( - PyTuple_GET_ITEM(match, 0), - PyInt_AS_LONG( - PyTuple_GET_ITEM(match, 1) - ) - ); - if (newTable == NULL) { - childReturnCode = ERROR_CODE; - errorType = PyExc_TypeError; - errorMessage = PyString_FromFormat( - "Tag table entry %i: Could not find target table in list of tables", - index - ); - } else { - if (mxTagTable_Check(newTable)) { - /* This is decref'd in POP */ - Py_INCREF(newTable); - } else { - /* These tables are considered to be - cacheable. */ - newTable = mxTagTable_New(newTable, - table->tabletype, - 1); - /* why didn't we increment the refcount here? does New give us a new ref? */ - if (newTable == NULL) { - childReturnCode = ERROR_CODE; - errorType = PyExc_TypeError; - errorMessage = PyString_FromFormat( - "Tag table entry %i: Could not compile target table", - index - ); - } - } - } - break; - } - - } - - if (childReturnCode == NULL_CODE) { - /* we found a valid newTable */ - PyObject *subtags = NULL; - - if (taglist != Py_None && command != MATCH_SUBTABLE && command != MATCH_SUBTABLEINLIST) { - /* Create a new list for use as subtaglist - - Will be decref'd by the child-finished clause if necessary - */ - subtags = PyList_New(0); - if (subtags == NULL) { - childReturnCode = ERROR_CODE; - errorType = PyExc_MemoryError; - } - } else { - /* Use taglist as subtaglist - - We don't incref it as we check explicitly for whether - it's the same when we go to decref (during childReturnCode - handling) - */ - subtags = taglist; - } - - /* match other table */ - PUSH_STACK( newTable, subtags ); - RESET_TABLE_VARIABLES - } - } - break; - } - diff -Nru simpleparse-2.1.0a1/stt/TextTools/mxTextTools/Setup.in simpleparse-2.2.0/stt/TextTools/mxTextTools/Setup.in --- simpleparse-2.1.0a1/stt/TextTools/mxTextTools/Setup.in 2006-02-18 23:40:03.000000000 +0000 +++ simpleparse-2.2.0/stt/TextTools/mxTextTools/Setup.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,15 +0,0 @@ -# Build extensions shared: -*shared* -# - -# Text processing tools -mxTextTools \ -# -# To enable debugging support uncomment the next line (will print information -# to various .log files if Python is run with -d option): -# -DMAL_DEBUG_WITH_PYTHON \ -# -# To enable the Fast Search Algorithm, also add the next line: -# -DMXFASTSEARCH private/mxfse.c \ -# Always leave this line uncommented: - mxTextTools.c mxte.c mxbmse.c diff -Nru simpleparse-2.1.0a1/stt/TextTools/mxTextTools/speccommands.h simpleparse-2.2.0/stt/TextTools/mxTextTools/speccommands.h --- simpleparse-2.1.0a1/stt/TextTools/mxTextTools/speccommands.h 2006-02-18 23:40:03.000000000 +0000 +++ simpleparse-2.2.0/stt/TextTools/mxTextTools/speccommands.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,101 +0,0 @@ -/* "Special" commands code fragment - - The contract here is: - - The commands may alter any of the tag-specific variables - - errors may be indicated if encountered in childReturnCode and the error* variables - - setting childReturnCode equal to FAILURE_CODE declares that the read head has not moved - - childReturnCode must be set (or default "you have to move forward to match" semantics are used) - -*/ -/* doesn't there need to be a check for integer arguments - that the value is an integer? - Or does the compiler do that now */ - - - - case MATCH_FAIL: /* == MATCH_JUMP */ - /* dumb question, what is MATCH_JUMP supposed to do? */ - childReturnCode = FAILURE_CODE; - break; - - case MATCH_SKIP: - /* Argh, what to do when moves past buffer? - - Where do we check that this is still in-bounds? - documented as always succeeding, but results in - result-tuples with negative or out-of-range values - in current code. - Can't do: - if (childPosition < sliceleft) { - childPosition = 0; - } else if (childPosition > sliceright) { - childPosition = sliceright; - } - because we might have another move, or an EOF - or whatever coming up. - - Marc-André want's these conditions: - childPosition < 0 { # (not sliceleft!) - raise TypeError: Tag Table entry %(index): moved/skipped beyond start of text - } and no check for > right or beyond end of buffer... - */ - DPRINTF("\nSkip %li characters\n" - " in string = '%.40s'\n", - PyInt_AS_LONG(match),text+childPosition); - childPosition += PyInt_AS_LONG(match); - childReturnCode = SUCCESS_CODE; - break; - - case MATCH_MOVE: - /* same potential out-of-bounds issue as with skip */ - childPosition = PyInt_AS_LONG(match); - if (childPosition < 0) { - /* Relative to end of the slice */ - childPosition += sliceright + 1; - } else { - /* Relative to beginning of the slice */ - childPosition += sliceleft; - } - DPRINTF("\nMove to position %i \n" - " string = '%.40s'\n", - childPosition,text+childPosition); - childReturnCode = SUCCESS_CODE; - break; - - case MATCH_EOF: - DPRINTF("\nEOF at position %i ? \n" - " string = '%.40s'\n", - childPosition,text+childPosition); - - if (sliceright > childPosition) { /* not matched */ - childReturnCode = FAILURE_CODE; - } else { - /* I don't see why this would necessarily be the end of the parsing run, after all - you might want to match EOF, then back up X characters? The documentation doesn't - mention anything about such a restriction. - - Approach here seems to match documentation functionality - but still suffers the out-of-range problems seen in move - and skip commands as well. - */ - childReturnCode = SUCCESS_CODE; - childPosition = sliceright; - childStart = sliceright; - } - break; - - - case MATCH_JUMPTARGET: - /* note: currently this can report a value, though I don't think - that was intended originally. I see it as useful because it lets - you enter a flag in the results table just by specifying a non-None - tagobj */ - /* null operation */ - DPRINTF("\nJumpTarget '%.40s' (skipped)\n", - PyString_AsString(match)); - childReturnCode = SUCCESS_CODE; - break; diff -Nru simpleparse-2.1.0a1/stt/TextTools/README simpleparse-2.2.0/stt/TextTools/README --- simpleparse-2.1.0a1/stt/TextTools/README 2006-02-18 23:34:33.000000000 +0000 +++ simpleparse-2.2.0/stt/TextTools/README 1970-01-01 00:00:00.000000000 +0000 @@ -1,4 +0,0 @@ -Please see the documentation in the Doc/ subdirectory for -further information on installation and usage. - -Marc-Andre Lemburg, mal@lemburg.com diff -Nru simpleparse-2.1.0a1/stt/TextTools/TextTools.py simpleparse-2.2.0/stt/TextTools/TextTools.py --- simpleparse-2.1.0a1/stt/TextTools/TextTools.py 2006-02-18 23:33:56.000000000 +0000 +++ simpleparse-2.2.0/stt/TextTools/TextTools.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,806 +0,0 @@ -""" mxTextTools - A tools package for fast text processing. - - Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com - Copyright (c) 2000-2003, eGenix.com Software GmbH; mailto:info@egenix.com - See the documentation for further information on copyrights, - or contact the author. All Rights Reserved. -""" -import string,types - -# -# import the C module and the version number -# -from mxTextTools import * -from mxTextTools import __version__ - -# -# import the symbols needed to write tag tables -# -from Constants.TagTables import * - -# -# import the some handy character sets -# -from Constants.Sets import * - -# -# format and print tables, taglists and joinlists: -# -def format_entry(table,i, - - TupleType=types.TupleType): - - """ Returns a pp-formatted tag table entry as string - """ - e = table[i] - jne = 0 - je = 1 - t,c,m = e[:3] - if len(e)>3: jne = e[3] - if len(e)>4: je = e[4] - flags,cmd = divmod(c,256) - c = id2cmd[cmd] - if type(m) == TupleType and c in ('Table','SubTable'): - m = '' - elif m == None: - m = 'Here/To' - else: - m = repr(m) - if len(m) > 17: - m = m[:17]+'...' - return '%-15.15s : %-30s : jne=%+i : je=%+i' % \ - (repr(t),'%-.15s : %s'%(c,m),jne,je) - -def format_table(table,i=-1): - - """ Returns a pp-formatted version of the tag table as string """ - - l = [] - for j in range(len(table)): - if i == j: - l.append('--> '+format_entry(table,j)) - else: - l.append(' '+format_entry(table,j)) - return string.join(l,'\n')+'\n' - -def print_tagtable(table): - - """ Print the tag table - """ - print format_table(table) - -def print_tags(text,tags,indent=0): - - """ Print the taglist tags for text using the given indent level - """ - for tag,l,r,subtags in tags: - tagname = repr(tag) - if len(tagname) > 20: - tagname = tagname[:20] + '...' - target = repr(text[l:r]) - if len(target) > 60: - target = target[:60] + '...' - if subtags == None: - print ' '+indent*' |',tagname,': ',target,(l,r) - else: - print ' '+indent*' |',tagname,': ',target,(l,r) - print_tags(text,subtags,indent+1) - -def print_joinlist(joins,indent=0, - - StringType=types.StringType): - - """ Print the joinlist joins using the given indent level - """ - for j in joins: - if type(j) == StringType: - text = repr(j) - if len(text) > 40: - text = text[:40] + '...' - print ' '+indent*' |',text,' (len = %i)' % len(j) - else: - text = j[0] - l,r = j[1:3] - text = repr(text[l:r]) - if len(text) > 40: - text = text[:40] + '...' - print ' '+indent*' |',text,' (len = %i)' % (r-l),(l,r) - -def normlist(jlist, - - StringType=types.StringType): - - """ Return a normalized joinlist. - - All tuples in the joinlist are turned into real strings. The - resulting list is a equivalent copy of the joinlist only - consisting of strings. - - """ - l = [''] * len(jlist) - for i in range(len(jlist)): - entry = jlist[i] - if type(entry) == StringType: - l[i] = entry - else: - l[i] = entry[0][entry[1]:entry[2]] - return l - -# -# aid for matching from a list of words -# -def _lookup_dict(l,index=0): - - d = {} - for w in l: - c = w[index] - if d.has_key(c): - d[c].append(w) - else: - d[c] = [w] - return d - -def word_in_list(l): - - """ Creates a lookup table that matches the words in l - """ - t = [] - d = _lookup_dict(l) - keys = d.keys() - if len(keys) < 18: # somewhat arbitrary bound - # fast hint for small sets - t.append((None,IsIn,string.join(d.keys(),''))) - t.append((None,Skip,-1)) - # test groups - for c, group in d.items(): - t.append(None) # hint will be filled in later - i = len(t)-1 - for w in group: - t.append((None,Word,w[1:],+1,MatchOk)) - t.append((None,Fail,Here)) - # add hint - t[i] = (None,Is,c,len(t)-i) - t.append((None,Fail,Here)) - return tuple(t) - -# -# Extra stuff useful in combination with the C functions -# - -def replace(text,what,with,start=0,stop=None, - - SearchObject=TextSearch,join=join,joinlist=joinlist,tag=tag, - string_replace=string.replace,type=type, - StringType=types.StringType): - - """A fast replacement for string.replace. - - what can be given as string or search object. - - This function is a good example for the AppendTagobj-flag usage - (the taglist can be used directly as joinlist). - - """ - if type(what) is not TextSearchType: - so = SearchObject(what) - else: - so = what - what = so.match - if stop is None: - if start == 0 and len(what) < 2: - return string_replace(text,what,with) - stop = len(text) - t = ((text,sWordStart,so,+2), - # Found something, replace and continue searching - (with,Skip+AppendTagobj,len(what),-1,-1), - # Rest of text - (text,Move,ToEOF) - ) - found,taglist,last = tag(text,t,start,stop) - if not found: - return text - return join(taglist) - -# Alternative (usually slower) versions using different techniques: - -def _replace2(text,what,with,start=0,stop=None, - - join=join,joinlist=joinlist,tag=tag, - TextSearchType=TextSearchType,TextSearch=TextSearch): - - """Analogon to string.replace; returns a string with all occurences - of what in text[start:stop] replaced by with. - - This version uses a one entry tag-table and a - Boyer-Moore-Search-object. what can be a string or a - TextSearch search object. It's faster than string.replace in - those cases, where the what-string gets long and/or many - replacements are found; faster meaning from a few percent up to - many times as fast - - start and stop define the slice of text to work in. stop - defaults to len(text). - - """ - if stop is None: - stop = len(text) - if type(what) is not TextSearchType: - what=TextSearch(what) - t = ((with,sFindWord,what,+1,+0),) - found,taglist,last = tag(text,t,start,stop) - if not found: - return text - return join(joinlist(text,taglist)) - -def _replace3(text,what,with, - - join=string.join,TextSearch=TextSearch, - TextSearchType=TextSearchType): - - if type(what) is not TextSearchType: - what=TextSearch(what) - slices = what.findall(text) - if not slices: - return text - l = [] - x = 0 - for left,right in slices: - l.append(text[x:left] + with) - x = right - l.append(text[x:]) - return join(l,'') - -def _replace4(text,what,with, - - join=join,joinlist=joinlist,tag=tag,TextSearch=TextSearch, - TextSearchType=TextSearchType): - - if type(what) is not TextSearchType: - what=TextSearch(what) - slices = what.findall(text) - if not slices: - return text - repl = [None]*len(slices) - for i in range(len(slices)): - repl[i] = (with,)+slices[i] - return join(joinlist(text,repl)) - -def multireplace(text,replacements,start=0,stop=None, - - join=join,joinlist=joinlist): - - """ Apply multiple replacement to a text at once. - - replacements must be list of tuples (replacement, left, - right). It is used to replace the slice text[left:right] with - the string replacement. - - Note that the replacements do not affect one another. Indices - always refer to the original text string. - - Replacements must not overlap. Otherwise a ValueError is - raised. - - """ - if stop is not None: - return join(joinlist(text,replacements,start,stop)) - else: - return join(joinlist(text,replacements,start)) - -def find(text,what,start=0,stop=None, - - SearchObject=TextSearch): - - """ A faster replacement for string.find(). - - Uses a search object for the task. Returns the position of the - first occurance of what in text[start:stop]. stop defaults to - len(text). Returns -1 in case no occurance was found. - - """ - if stop is not None: - return SearchObject(what).find(text,start,stop) - else: - return SearchObject(what).find(text,start) - -def findall(text,what,start=0,stop=None, - - SearchObject=TextSearch): - - """ Find all occurances of what in text. - - Uses a search object for the task. Returns a list of slice - tuples (l,r) marking the all occurances in - text[start:stop]. stop defaults to len(text). Returns an - empty list in case no occurance was found. - - """ - if stop is not None: - return SearchObject(what).findall(text,start,stop) - else: - return SearchObject(what).findall(text,start) - -def split(text,sep,start=0,stop=None,translate=None, - - SearchObject=TextSearch): - - """ A faster replacement for string.split(). - - Uses a search object for the task. Returns the result of - cutting the text[start:stop] string into snippets at every sep - occurance in form of a list of substrings. translate is passed - to the search object as translation string if given. - - XXX Undocumented. - - """ - if translate: - so = SearchObject(sep,translate) - else: - so = SearchObject(sep) - if stop: - cuts = so.findall(text,start,stop) - else: - cuts = so.findall(text,start) - l = 0 - list = [] - append = list.append - for left,right in cuts: - append(text[l:left]) - l = right - append(text[l:]) - return list - -# helper for tagdict -def _tagdict(text,dict,prefix,taglist): - - for o,l,r,s in taglist: - pfx = prefix + str(o) - dict[pfx] = text[l:r] - if s: - _tagdict(text,dict,pfx+'.',s) - -def tagdict(text,*args): - - """ Tag a text just like the function tag() and then convert - its output into a dictionary where the tagobjects reference - their respective strings - - This function emulates the interface of tag(). In contrast to - tag() this funtion *does* make copies of the found stings, - though. - - Returns a tuple (rc,tagdict,next) with the same meaning of rc - and next as tag(); tagdict is the new dictionary or None in - case rc is 0. - - """ - rc,taglist,next = apply(tag,(text,)+args) - if not rc: - return (rc,None,next) - d = {} - tagdict = _tagdict - for o,l,r,s in taglist: - pfx = str(o) - d[pfx] = text[l:r] - if s: - tagdict(text,d,pfx+'.',s) - return (rc,d,next) - -def invset(chars): - - """ Return a set with all characters *except* the ones in chars. - """ - return set(chars,0) - -def is_whitespace(text,start=0,stop=None, - - charset=nonwhitespace_charset): - - """ Return 1 iff text[start:stop] only contains whitespace - characters (as defined in Constants/Sets.py), 0 otherwise. - - """ - if stop is None: - stop = len(text) - return (charset.search(text, 1, start, stop) is None) - -def collapse(text,separator=' ', - - join=join,charset=CharSet(newline+whitespace)): - - """ Eliminates newline characters and compresses whitespace - characters into one space. - - The result is a one line text string. Tim Peters will like - this function called with '-' separator ;-) - - """ - return join(charset.split(text), separator) - -_linesplit_table = ( - (None,Is,'\r',+1), - (None,Is,'\n',+1), - ('line',AllInCharSet+AppendMatch,CharSet('^\r\n'),+1,-2), - (None,EOF,Here,+1,MatchOk), - ('empty line',Skip+AppendMatch,0,0,-4), - ) - -def splitlines(text, - - tag=tag,linesplit_table=_linesplit_table): - - """ Split text into a list of single lines. - - The following combinations are considered to be line-ends: - '\r', '\r\n', '\n'; they may be used in any combination. The - line-end indicators are removed from the strings prior to - adding them to the list. - - This function allows dealing with text files from Macs, PCs - and Unix origins in a portable way. - - """ - return tag(text, linesplit_table)[1] - -# Alias for backward compatibility -linesplit = splitlines - -_linecount_table = ( - (None,Is,'\r',+1), - (None,Is,'\n',+1), - ('line',AllInCharSet,CharSet('^\r\n'),+1,-2), - (None,EOF,Here,+1,MatchOk), - ('empty line',Skip,0,0,-4), - ) - -def countlines(text, - - linecount_table=_linecount_table): - - """ Returns the number of lines in text. - - Line ends are treated just like for splitlines() in a - portable way. - - """ - return len(tag(text, linecount_table)[1]) - -_wordsplit_table = ( - (None,AllInCharSet,whitespace_charset,+1), - ('word',AllInCharSet+AppendMatch,nonwhitespace_charset,+1,-1), - (None,EOF,Here,+1,MatchOk), - ) - -def splitwords(text, - - charset=whitespace_charset): - - """ Split text into a list of single words. - - Words are separated by whitespace. The whitespace is stripped - before adding the words to the list. - - """ - return charset.split(text) - -# -# Testing and benchmarking -# - -# Taken from my hack.py module: -import time -class _timer: - - """ timer class with a quite obvious interface - - .start() starts a fairly accurate CPU-time timer plus an - absolute timer - - .stop() stops the timer and returns a tuple: the CPU-time in seconds - and the absolute time elapsed since .start() was called - """ - - utime = 0 - atime = 0 - - def start(self, - - clock=time.clock,time=time.time): - - self.atime = time() - self.utime = clock() - - def stop(self, - - clock=time.clock,time=time.time): - - self.utime = clock() - self.utime - self.atime = time() - self.atime - return self.utime,self.atime - - def usertime(self, - - clock=time.clock,time=time.time): - - self.utime = clock() - self.utime - self.atime = time() - self.atime - return self.utime - - def abstime(self, - - clock=time.clock,time=time.time): - - self.utime = clock() - self.utime - self.atime = time() - self.atime - return self.utime - - def __str__(self): - - return '%0.2fu %0.2fa sec.' % (self.utime,self.atime) - -def _bench(file='mxTextTools/mxTextTools.c'): - - def mismatch(orig,new): - print - for i in range(len(orig)): - if orig[i] != new[i]: - break - else: - print 'Length mismatch: orig=%i new=%i' % (len(orig),len(new)) - if len(orig) > len(new): - print 'Missing chars:'+repr(orig[len(new):]) - else: - print 'Excess chars:'+repr(new[len(orig):]) - print - return - print 'Mismatch at offset %i:' % i - print (orig[i-100:i] - + '<- %s != %s ->' % (repr(orig[i]),repr(new[i])) - + orig[i+1:i+100]) - print - - text = open(file).read() - import string - - t = _timer() - print 'Working on a %i byte string' % len(text) - - if 0: - print - print 'Replacing strings' - print '-'*72 - print - for what,with in (('m','M'),('mx','MX'),('mxText','MXTEXT'), - ('hmm','HMM'),('hmmm','HMM'),('hmhmm','HMM')): - print 'Replace "%s" with "%s"' % (what,with) - t.start() - for i in range(100): - rtext = string.replace(text,what,with) - print 'with string.replace:',t.stop(),'sec.' - t.start() - for i in range(100): - ttext = replace(text,what,with) - print 'with tag.replace:',t.stop(),'sec.' - if ttext != rtext: - print 'results are NOT ok !' - print '-'*72 - mismatch(rtext,ttext) - t.start() - for i in range(100): - ttext = _replace2(text,what,with) - print 'with tag._replace2:',t.stop(),'sec.' - if ttext != rtext: - print 'results are NOT ok !' - print '-'*72 - print rtext - t.start() - for i in range(100): - ttext = _replace3(text,what,with) - print 'with tag._replace3:',t.stop(),'sec.' - if ttext != rtext: - print 'results are NOT ok !' - print '-'*72 - print rtext - t.start() - for i in range(100): - ttext = _replace4(text,what,with) - print 'with tag._replace4:',t.stop(),'sec.' - if ttext != rtext: - print 'results are NOT ok !' - print '-'*72 - print rtext - print - - if 0: - print - print 'String lower/upper' - print '-'*72 - print - - op = string.lower - t.start() - for i in range(1000): - op(text) - t.stop() - print ' string.lower:',t - - op = string.upper - t.start() - for i in range(1000): - op(text) - t.stop() - print ' string.upper:',t - - op = upper - t.start() - for i in range(1000): - op(text) - t.stop() - print ' TextTools.upper:',t - - op = lower - t.start() - for i in range(1000): - op(text) - t.stop() - print ' TextTools.lower:',t - - print 'Testing...', - ltext = string.lower(text) - assert ltext == lower(text) - utext = string.upper(text) - assert utext == upper(text) - print 'ok.' - - if 0: - print - print 'Joining lists' - print '-'*72 - print - - l = setsplit(text,whitespace_set) - - op = string.join - t.start() - for i in range(1000): - op(l) - t.stop() - print ' string.join:',t - - op = join - t.start() - for i in range(1000): - op(l) - t.stop() - print ' TextTools.join:',t - - op = string.join - t.start() - for i in range(1000): - op(l,' ') - t.stop() - print ' string.join with separator:',t - - op = join - t.start() - for i in range(1000): - op(l,' ') - t.stop() - print ' TextTools.join with separator:',t - - if 0: - print - print 'Creating join lists' - print '-'*72 - print - - repl = [] - for i in range(0,len(text),10): - repl.append((str(i),i,i+1)) - - op = joinlist - t.start() - for i in range(1000): - op(text,repl) - t.stop() - print ' TextTools.joinlist:',t - - if 0: - print - print 'Splitting text' - print '-'*72 - print - - op = string.split - t.start() - for i in range(100): - op(text) - t.stop() - print ' string.split whitespace:',t,'(',len(op(text)),'snippets )' - - op = setsplit - ws = whitespace_set - t.start() - for i in range(100): - op(text,ws) - t.stop() - print ' TextTools.setsplit whitespace:',t,'(',len(op(text,ws)),'snippets )' - - assert string.split(text) == setsplit(text,ws) - - op = string.split - sep = 'a' - t.start() - for i in range(100): - op(text,sep) - t.stop() - print ' string.split at "a":',t,'(',len(op(text,sep)),'snippets )' - - op = split - sep = 'a' - t.start() - for i in range(100): - op(text,sep) - t.stop() - print ' TextTools.split at "a":',t,'(',len(op(text,sep)),'snippets )' - - op = charsplit - sep = 'a' - t.start() - for i in range(100): - op(text,sep) - t.stop() - print ' TextTools.charsplit at "a":',t,'(',len(op(text,sep)),'snippets )' - - op = setsplit - sep = set('a') - t.start() - for i in range(100): - op(text,sep) - t.stop() - print ' TextTools.setsplit at "a":',t,'(',len(op(text,sep)),'snippets )' - - # Note: string.split and setsplit don't work identically ! - - op = string.split - sep = 'int' - t.start() - for i in range(100): - op(text,sep) - t.stop() - print ' string.split at "int":',t,'(',len(op(text,sep)),'snippets )' - - op = split - sep = 'int' - t.start() - for i in range(100): - op(text,sep) - t.stop() - print ' TextTools.split at "int":',t,'(',len(op(text,sep)),'snippets )' - - op = setsplit - sep = set('int') - t.start() - for i in range(100): - op(text,sep) - t.stop() - print ' TextTools.setsplit at "i", "n", "t":',t,'(',len(op(text,sep)),'snippets )' - - op = string.split - sep = 'register' - t.start() - for i in range(100): - op(text,sep) - t.stop() - print ' string.split at "register":',t,'(',len(op(text,sep)),'snippets )' - - op = split - sep = 'register' - t.start() - for i in range(100): - op(text,sep) - t.stop() - print ' TextTools.split at "register":',t,'(',len(op(text,sep)),'snippets )' - -if __name__=='__main__': - _bench() - diff -Nru simpleparse-2.1.0a1/tests/genericvalues.py simpleparse-2.2.0/tests/genericvalues.py --- simpleparse-2.1.0a1/tests/genericvalues.py 2002-07-23 22:56:11.000000000 +0000 +++ simpleparse-2.2.0/tests/genericvalues.py 2015-11-11 18:42:23.000000000 +0000 @@ -7,21 +7,15 @@ "whatever failure position" for failure return values. """ -class _NullResults: - def __cmp__( self, other ): - if other == [] or other == None: - return 0 - else: - return -1 - def __repr__( self ): - return "" +class _NullResults(object): + def __eq__(self, other): + return other == [] or other == None + def __repr__( self ): + return "" NullResult = _NullResults() class _AnyInt: - def __cmp__( self, other ): - if type(other) == type(1): - return 0 - else: - return -1 - def __repr__( self ): - return "" + def __eq__(self, other): + return type(other) == type(1) + def __repr__( self ): + return "" AnyInt = _AnyInt() diff -Nru simpleparse-2.1.0a1/tests/__init__.py simpleparse-2.2.0/tests/__init__.py --- simpleparse-2.1.0a1/tests/__init__.py 2002-07-07 10:22:48.000000000 +0000 +++ simpleparse-2.2.0/tests/__init__.py 2015-11-11 19:01:43.000000000 +0000 @@ -5,4 +5,4 @@ configured system. A system with an old copy of mx.TextTools might actually experience an infinite loop or a C stack recursion error. -""" +""" \ No newline at end of file diff -Nru simpleparse-2.1.0a1/tests/mx_flag.py simpleparse-2.2.0/tests/mx_flag.py --- simpleparse-2.1.0a1/tests/mx_flag.py 2006-02-18 23:13:28.000000000 +0000 +++ simpleparse-2.2.0/tests/mx_flag.py 2015-11-11 18:42:23.000000000 +0000 @@ -1,126 +1,125 @@ import unittest, pprint from simpleparse.stt.TextTools import * -import string from simpleparse.stt import TextTools -mxVersion = tuple(string.split( TextTools.__version__, '.')[:3]) +mxVersion = tuple(TextTools.__version__.split('.')[:3]) class MXFlagTests(unittest.TestCase): - """Test Flags for returning/calling different functions on success""" - def doBasicTest(self, table, testvalue, expected, startPosition=0 ): - result = tag( testvalue, table , startPosition) - assert result == expected, '''\n\texpected:%s\n\tgot:%s\n'''%( expected, result ) - ### Return-type handling tests... - def testCallTag1( self ): - """Test CallTag""" - def function (parentList, text, l,r,children): - parentList.append( (text[l:r], children) ) - self.doBasicTest( - ( - ( function, AllIn + CallTag, "ab", 0 ), - ), - "abbaabccd", - ( 1,[ - ("abbaab",None), - ],6), - ) - def testCallTag2( self ): - """Test CallTag with a class instance""" - class A: - def __call__(self, parentList, text, l,r,children): - parentList.append( (text[l:r], children) ) - self.doBasicTest( - ( - ( A(), AllIn + CallTag, "ab", 0 ), - ), - "abbaabccd", - ( 1,[ - ("abbaab",None), - ],6), - ) - def testAppendMatch1( self ): - """Test AppendMatch""" - def function (parentList, text, l,r,children): - parentList.append( (text[l:r], children) ) - self.doBasicTest( - ( - ( function, AllIn + AppendMatch, "ab", 0 ), - ), - "abbaabccd", - ( 1,[ - "abbaab", - ],6), - ) - def testAppendToTagobj1( self ): - """Test AppendToTagobj""" - class X: - successful = "" - def append(self, value): - self.successful = value - tag = X() - self.doBasicTest( - ( - ( tag, AllIn + AppendToTagobj, "ab", 0 ), - ), - "abbaabccd", - ( 1,[ - ],6), - ) - assert tag.successful == (None,0,6,None), "TagObject's append was called with %s"%(repr(tag.successful),) - def testAppendToTagobj2( self ): - """Test AppendToTagobj with a simple list""" - - tag = [] - self.doBasicTest( - ( - ( tag, AllIn + AppendToTagobj, "ab", 0 ), - ), - "abbaabccd", - ( 1,[ - ],6), - ) - assert tag[0] == (None,0,6,None), "TagObject's append was called with %s"%(repr(tag.successful),) - - def testAppendTagobj1( self ): - """Test AppendTagobj""" - self.doBasicTest( - ( - ( "Hi there world!", AllIn + AppendTagobj, "ab", 0 ), - ), - "abbaabccd", - ( 1,[ - "Hi there world!", - ],6), - ) - if mxVersion >= ('2','1'): - def testLookAhead1( self ): - """Test LookAhead""" - self.doBasicTest( - ( - ( "whatever", AllIn + LookAhead, "ab", 0 ), - ), - "abbaabccd", - ( 1,[ - ("whatever",0,6,None), - ],0), - ) - def testLookAhead2( self ): - """Test LookAhead""" - self.doBasicTest( - ( - ( "whatever", AllIn + LookAhead, "ab", 0 ), - ( "whatever2", AllIn, "ab", 0 ), - ), - "abbaabccd", - ( 1,[ - ("whatever",0,6,None), - ("whatever2",0,6,None), - ],6), - ) + """Test Flags for returning/calling different functions on success""" + def doBasicTest(self, table, testvalue, expected, startPosition=0 ): + result = tag( testvalue, table , startPosition) + assert result == expected, '''\n\texpected:%s\n\tgot:%s\n'''%( expected, result ) + ### Return-type handling tests... + def testCallTag1( self ): + """Test CallTag""" + def function (parentList, text, l,r,children): + parentList.append( (text[l:r], children) ) + self.doBasicTest( + ( + ( function, AllIn + CallTag, "ab", 0 ), + ), + "abbaabccd", + ( 1,[ + ("abbaab",None), + ],6), + ) + def testCallTag2( self ): + """Test CallTag with a class instance""" + class A: + def __call__(self, parentList, text, l,r,children): + parentList.append( (text[l:r], children) ) + self.doBasicTest( + ( + ( A(), AllIn + CallTag, "ab", 0 ), + ), + "abbaabccd", + ( 1,[ + ("abbaab",None), + ],6), + ) + def testAppendMatch1( self ): + """Test AppendMatch""" + def function (parentList, text, l,r,children): + parentList.append( (text[l:r], children) ) + self.doBasicTest( + ( + ( function, AllIn + AppendMatch, "ab", 0 ), + ), + "abbaabccd", + ( 1,[ + "abbaab", + ],6), + ) + def testAppendToTagobj1( self ): + """Test AppendToTagobj""" + class X: + successful = "" + def append(self, value): + self.successful = value + tag = X() + self.doBasicTest( + ( + ( tag, AllIn + AppendToTagobj, "ab", 0 ), + ), + "abbaabccd", + ( 1,[ + ],6), + ) + assert tag.successful == (None,0,6,None), "TagObject's append was called with %s"%(repr(tag.successful),) + def testAppendToTagobj2( self ): + """Test AppendToTagobj with a simple list""" + + tag = [] + self.doBasicTest( + ( + ( tag, AllIn + AppendToTagobj, "ab", 0 ), + ), + "abbaabccd", + ( 1,[ + ],6), + ) + assert tag[0] == (None,0,6,None), "TagObject's append was called with %s"%(repr(tag.successful),) + + def testAppendTagobj1( self ): + """Test AppendTagobj""" + self.doBasicTest( + ( + ( "Hi there world!", AllIn + AppendTagobj, "ab", 0 ), + ), + "abbaabccd", + ( 1,[ + "Hi there world!", + ],6), + ) + if mxVersion >= ('2','1'): + def testLookAhead1( self ): + """Test LookAhead""" + self.doBasicTest( + ( + ( "whatever", AllIn + LookAhead, "ab", 0 ), + ), + "abbaabccd", + ( 1,[ + ("whatever",0,6,None), + ],0), + ) + def testLookAhead2( self ): + """Test LookAhead""" + self.doBasicTest( + ( + ( "whatever", AllIn + LookAhead, "ab", 0 ), + ( "whatever2", AllIn, "ab", 0 ), + ), + "abbaabccd", + ( 1,[ + ("whatever",0,6,None), + ("whatever2",0,6,None), + ],6), + ) def getSuite(): - return unittest.makeSuite(MXFlagTests,'test') + return unittest.makeSuite(MXFlagTests,'test') if __name__ == "__main__": - unittest.main(defaultTest="getSuite") + unittest.main(defaultTest="getSuite") diff -Nru simpleparse-2.1.0a1/tests/mx_high.py simpleparse-2.2.0/tests/mx_high.py --- simpleparse-2.1.0a1/tests/mx_high.py 2006-02-18 23:13:46.000000000 +0000 +++ simpleparse-2.2.0/tests/mx_high.py 2015-11-11 18:42:23.000000000 +0000 @@ -1,192 +1,191 @@ """Low-level matching tests for mx.TextTools""" -import unittest, pprint +import unittest from simpleparse.stt.TextTools import * -import string from simpleparse.stt import TextTools -mxVersion = tuple(string.split( TextTools.__version__, '.')[:3]) +mxVersion = tuple(TextTools.__version__.split('.')[:3]) class MXHighTests(unittest.TestCase): - def doBasicTest(self, table, testvalue, expected, startPosition=0 ): - result = tag( testvalue, table , startPosition) - assert result == expected, '''\n\texpected:%s\n\tgot:%s\n'''%( expected, result ) - - ### XXX Need to figure out what the heck loop is for and how to test it - - - def testCall( self ): - """Test call-to-match Call command""" - def function( text, start, end ): - return end - - self.doBasicTest( - ( - ( "ab", Call, function, 0 ), - ), - "cdffgg", - ( 1,[ - ("ab",0,6,None), - ],6), - ) - def testCall2( self ): - """Test call-to-match Call command with object instance""" - class X: - def __call__( self, text, start, end ): - return end - - self.doBasicTest( - ( - ( "ab", Call, X(), 0 ), - ), - "cdffgg", - ( 1,[ - ("ab",0,6,None), - ],6), - ) - - def testCallArg( self ): - """Test call-to-match CallArg command""" - def function( text, start, end, *arguments ): - assert arguments == (1,2,3), """Passed arguments were not what we passed in""" - return end - - self.doBasicTest( - ( - ( "ab", CallArg, (function,1,2,3), 0 ), - ), - "cdffgg", - ( 1,[ - ("ab",0,6,None), - ],6), - ) - - if mxVersion >= ('2','1'): - def testsWordStart1( self ): - """Test simple sWordStart command""" - for algo in [BOYERMOORE, TRIVIAL]: - self.doBasicTest( - ( - ( "ab", sWordStart, TextSearch("ab", algorithm=algo), 0 ), - ), - "ddeeffab", - ( 1,[("ab",0,6,None)],6), - ) - def testsWordStart2( self ): - """Test simple sWordStart command ignore fail""" - for algo in [BOYERMOORE, TRIVIAL]: - self.doBasicTest( - ( - ( "ab", sWordStart, TextSearch("ab", algorithm=algo), 1,1), - ), - "cdffgg", - ( 1,[],0), - ) - - def testsWordEnd1( self ): - """Test simple sWordEnd command""" - for algo in [BOYERMOORE, TRIVIAL]: - self.doBasicTest( - ( - ( "ab", sWordEnd, TextSearch("ab", algorithm=algo), 0 ), - ), - "ddeeffab", - ( 1,[("ab",0,8,None)],8), - ) - def testsWordEnd2( self ): - """Test simple sWordEnd command ignore fail""" - for algo in [BOYERMOORE, TRIVIAL]: - self.doBasicTest( - ( - ( "ab", sWordEnd, TextSearch("ab", algorithm=algo), 1,1), - ), - "cdffgg", - ( 1,[],0), - ) - - - def testsFindWord1( self ): - """Test simple sWordFind command""" - for algo in [BOYERMOORE, TRIVIAL]: - self.doBasicTest( - ( - ( "ab", sFindWord, TextSearch("ab", algorithm=algo), 0 ), - ), - "ddeeffab", - ( 1,[("ab",6,8,None)],8), - ) - def testsFindWord2( self ): - """Test simple sFindWord command ignore fail""" - for algo in [BOYERMOORE, TRIVIAL]: - self.doBasicTest( - ( - ( "ab", sFindWord, TextSearch("ab", algorithm=algo), 1,1), - ), - "cdffgg", - ( 1,[],0), - ) - else: - def testsWordStart1( self ): - """Test simple sWordStart command""" - self.doBasicTest( - ( - ( "ab", sWordStart, BMS("ab"), 0 ), - ), - "ddeeffab", - ( 1,[("ab",0,6,None)],6), - ) - def testsWordStart2( self ): - """Test simple sWordStart command ignore fail""" - self.doBasicTest( - ( - ( "ab", sWordStart, BMS("ab"), 1,1), - ), - "cdffgg", - ( 1,[],0), - ) - - def testsWordEnd1( self ): - """Test simple sWordEnd command""" - self.doBasicTest( - ( - ( "ab", sWordEnd, BMS("ab"), 0 ), - ), - "ddeeffab", - ( 1,[("ab",0,8,None)],8), - ) - def testsWordEnd2( self ): - """Test simple sWordEnd command ignore fail""" - self.doBasicTest( - ( - ( "ab", sWordEnd, BMS("ab"), 1,1), - ), - "cdffgg", - ( 1,[],0), - ) - - - def testsFindWord1( self ): - """Test simple sWordFind command""" - self.doBasicTest( - ( - ( "ab", sFindWord, BMS("ab"), 0 ), - ), - "ddeeffab", - ( 1,[("ab",6,8,None)],8), - ) - def testsFindWord2( self ): - """Test simple sFindWord command ignore fail""" - self.doBasicTest( - ( - ( "ab", sFindWord, BMS("ab"), 1,1), - ), - "cdffgg", - ( 1,[],0), - ) - + def doBasicTest(self, table, testvalue, expected, startPosition=0 ): + result = tag( testvalue, table , startPosition) + assert result == expected, '''\n\texpected:%s\n\tgot:%s\n'''%( expected, result ) + + ### XXX Need to figure out what the heck loop is for and how to test it + + + def testCall( self ): + """Test call-to-match Call command""" + def function( text, start, end ): + return end + + self.doBasicTest( + ( + ( "ab", Call, function, 0 ), + ), + "cdffgg", + ( 1,[ + ("ab",0,6,None), + ],6), + ) + def testCall2( self ): + """Test call-to-match Call command with object instance""" + class X: + def __call__( self, text, start, end ): + return end + + self.doBasicTest( + ( + ( "ab", Call, X(), 0 ), + ), + "cdffgg", + ( 1,[ + ("ab",0,6,None), + ],6), + ) + + def testCallArg( self ): + """Test call-to-match CallArg command""" + def function( text, start, end, *arguments ): + assert arguments == (1,2,3), """Passed arguments were not what we passed in""" + return end + + self.doBasicTest( + ( + ( "ab", CallArg, (function,1,2,3), 0 ), + ), + "cdffgg", + ( 1,[ + ("ab",0,6,None), + ],6), + ) + + if mxVersion >= ('2','1'): + def testsWordStart1( self ): + """Test simple sWordStart command""" + for algo in [BOYERMOORE, TRIVIAL]: + self.doBasicTest( + ( + ( b"ab", sWordStart, TextSearch(b"ab", algorithm=algo), 0 ), + ), + b"ddeeffab", + ( 1,[(b"ab",0,6,None)],6), + ) + def testsWordStart2( self ): + """Test simple sWordStart command ignore fail""" + for algo in [BOYERMOORE, TRIVIAL]: + self.doBasicTest( + ( + ( b"ab", sWordStart, TextSearch(b"ab", algorithm=algo), 1,1), + ), + b"cdffgg", + ( 1,[],0), + ) + + def testsWordEnd1( self ): + """Test simple sWordEnd command""" + for algo in [BOYERMOORE, TRIVIAL]: + self.doBasicTest( + ( + ( b"ab", sWordEnd, TextSearch(b"ab", algorithm=algo), 0 ), + ), + b"ddeeffab", + ( 1,[(b"ab",0,8,None)],8), + ) + def testsWordEnd2( self ): + """Test simple sWordEnd command ignore fail""" + for algo in [BOYERMOORE, TRIVIAL]: + self.doBasicTest( + ( + ( b"ab", sWordEnd, TextSearch(b"ab", algorithm=algo), 1,1), + ), + b"cdffgg", + ( 1,[],0), + ) + + + def testsFindWord1( self ): + """Test simple sWordFind command""" + for algo in [BOYERMOORE, TRIVIAL]: + self.doBasicTest( + ( + ( b"ab", sFindWord, TextSearch(b"ab", algorithm=algo), 0 ), + ), + b"ddeeffab", + ( 1,[(b"ab",6,8,None)],8), + ) + def testsFindWord2( self ): + """Test simple sFindWord command ignore fail""" + for algo in [BOYERMOORE, TRIVIAL]: + self.doBasicTest( + ( + ( b"ab", sFindWord, TextSearch(b"ab", algorithm=algo), 1,1), + ), + b"cdffgg", + ( 1,[],0), + ) + else: + def testsWordStart1( self ): + """Test simple sWordStart command""" + self.doBasicTest( + ( + ( b"ab", sWordStart, BMS("ab"), 0 ), + ), + b"ddeeffab", + ( 1,[(b"ab",0,6,None)],6), + ) + def testsWordStart2( self ): + """Test simple sWordStart command ignore fail""" + self.doBasicTest( + ( + ( b"ab", sWordStart, BMS("ab"), 1,1), + ), + b"cdffgg", + ( 1,[],0), + ) + + def testsWordEnd1( self ): + """Test simple sWordEnd command""" + self.doBasicTest( + ( + ( b"ab", sWordEnd, BMS(b"ab"), 0 ), + ), + b"ddeeffab", + ( 1,[(b"ab",0,8,None)],8), + ) + def testsWordEnd2( self ): + """Test simple sWordEnd command ignore fail""" + self.doBasicTest( + ( + ( b"ab", sWordEnd, BMS(b"ab"), 1,1), + ), + b"cdffgg", + ( 1,[],0), + ) + + + def testsFindWord1( self ): + """Test simple sWordFind command""" + self.doBasicTest( + ( + ( "ab", sFindWord, BMS("ab"), 0 ), + ), + "ddeeffab", + ( 1,[("ab",6,8,None)],8), + ) + def testsFindWord2( self ): + """Test simple sFindWord command ignore fail""" + self.doBasicTest( + ( + ( "ab", sFindWord, BMS("ab"), 1,1), + ), + "cdffgg", + ( 1,[],0), + ) + - + def getSuite(): - return unittest.makeSuite(MXHighTests,'test') + return unittest.makeSuite(MXHighTests,'test') if __name__ == "__main__": - unittest.main(defaultTest="getSuite") + unittest.main(defaultTest="getSuite") diff -Nru simpleparse-2.1.0a1/tests/mx_low.py simpleparse-2.2.0/tests/mx_low.py --- simpleparse-2.1.0a1/tests/mx_low.py 2006-02-18 23:14:02.000000000 +0000 +++ simpleparse-2.2.0/tests/mx_low.py 2015-11-11 19:01:43.000000000 +0000 @@ -1,302 +1,301 @@ """Low-level matching tests for mx.TextTools""" -import unittest, pprint +import unittest from simpleparse.stt.TextTools import * -import string from simpleparse.stt import TextTools -mxVersion = tuple(string.split( TextTools.__version__, '.')[:3]) -from genericvalues import AnyInt, NullResult +mxVersion = tuple(TextTools.__version__.split('.')[:3]) +from .genericvalues import AnyInt class MXLowTests(unittest.TestCase): - def doBasicTest(self, table, testvalue, expected, startPosition=0 ): - result = tag( testvalue, table , startPosition) - assert result == expected, '''\n\texpected:%s\n\tgot:%s\n'''%( expected, result ) - def testAllIn1( self ): - """Test simple AllIn command""" - self.doBasicTest( - ( - ( "ab", AllIn, "ab", 0 ), - ), - "abbaab", - ( 1,[("ab",0,6,None)],6), - ) - def testAllIn2( self ): - """Test simple AllIn command ignore fail""" - self.doBasicTest( - ( - ( "ab", AllIn, "ab", 1,1 ), - ), - "c", - ( 1,[],0), - ) - def testAllIn3( self ): - """Test simple AllIn command w 2 items""" - self.doBasicTest( - ( - ( "ab", AllIn, "ab", 1,1 ), - ( "c", AllIn, "cde", 0 ), - ), - "abbaabccdd", - ( 1,[ - ("ab",0,6,None), - ("c",6,10,None), - - ],10), - ) - def testAllIn4( self ): - """Test simple AllIn command fail on second - - This should truncate the results list back to [], as well - as returning 0 as length. This is broken under - mx.TextTools 2.1.0b1! - """ - self.doBasicTest( - ( - ( "ab", AllIn, "ab", 1,1 ), - ( "c", AllIn, "cde", 0 ), - ), - "abbaab", - ( 0,[ - ],AnyInt), - ) - def testAllIn5( self ): - """Test simple AllIn command with None tagobj""" - self.doBasicTest( - ( - ( None, AllIn, "ab", 0 ), - ), - "abbaab", - ( 1,[],6), - ) - def testAllNotIn1( self ): - """Test simple AllNotIn command""" - self.doBasicTest( - ( - ( "ab", AllNotIn, "ab", 0 ), - ), - "ccddee", - ( 1,[("ab",0,6,None)],6), - ) - def testAllNotIn2( self ): - """Test simple AllNotIn command ignore fail""" - self.doBasicTest( - ( - ( "ab", AllNotIn, "ab", 1,1 ), - ), - "a", - ( 1,[],0), - ) - def testAllNotIn3( self ): - """Test simple AllNotIn command w 2 items""" - self.doBasicTest( - ( - ( "ab", AllNotIn, "ab", 1,1 ), - ( "c", AllNotIn, "cde", 0 ), - ), - "ccddabbaab", - ( 1,[ - ("ab",0,4,None), - ("c",4,10,None), - - ],10), - ) - - - def testIs1( self ): - """Test simple Is command""" - self.doBasicTest( - ( - ( "ab", Is, "a", 0 ), - ), - "abbaab", - ( 1,[("ab",0,1,None)],1), - ) - def testIs2( self ): - """Test simple Is command ignore fail""" - self.doBasicTest( - ( - ( "ab", Is, "a", 1,1), - ), - "c", - ( 1,[],0), - ) - - def testIsIn1( self ): - """Test simple IsIn command""" - self.doBasicTest( - ( - ( "ab", IsIn, "ab", 0 ), - ), - "abbaab", - ( 1,[("ab",0,1,None)],1), - ) - def testIsIn2( self ): - """Test simple IsIn command ignore fail""" - self.doBasicTest( - ( - ( "ab", IsIn, "ab", 1,1), - ), - "c", - ( 1,[],0), - ) - - def testIsNotIn1( self ): - """Test simple IsNotIn command""" - self.doBasicTest( - ( - ( "ab", IsNotIn, "ab", 0 ), - ), - "ccddee", - ( 1,[("ab",0,1,None)],1), - ) - def testIsNotIn2( self ): - """Test simple IsNotIn command ignore fail""" - self.doBasicTest( - ( - ( "ab", IsNotIn, "ab", 1,1), - ), - "abb", - ( 1,[],0), - ) - - - def testWord1( self ): - """Test simple Word command""" - self.doBasicTest( - ( - ( "ab", Word, "ab", 0 ), - ), - "ab", - ( 1,[("ab",0,2,None)],2), - ) - def testWord2( self ): - """Test simple Word command ignore fail""" - self.doBasicTest( - ( - ( "ab", Word, "ab", 1,1), - ), - "cd", - ( 1,[],0), - ) - def testWordStart1( self ): - """Test simple WordStart command""" - self.doBasicTest( - ( - ( "ab", WordStart, "ab", 0 ), - ), - "ddeeffab", - ( 1,[("ab",0,6,None)],6), - ) - def testWordStart2( self ): - """Test simple WordStart command ignore fail""" - self.doBasicTest( - ( - ( "ab", WordStart, "ab", 1,1), - ), - "cdffgg", - ( 1,[],0), - ) - - def testWordEnd1( self ): - """Test simple WordEnd command""" - self.doBasicTest( - ( - ( "ab", WordEnd, "ab", 0 ), - ), - "ddeeffab", - ( 1,[("ab",0,8,None)],8), - ) - def testWordEnd2( self ): - """Test simple WordEnd command ignore fail""" - self.doBasicTest( - ( - ( "ab", WordEnd, "ab", 1,1), - ), - "cdffgg", - ( 1,[],0), - ) - - def testAllInSet1( self ): - """Test simple AllInSet command""" - self.doBasicTest( - ( - ( "ab", AllInSet, set("ab"), 0 ), - ), - "abbaab", - ( 1,[("ab",0,6,None)],6), - ) - def testAllInSet2( self ): - """Test simple AllInSet command ignore fail""" - self.doBasicTest( - ( - ( "ab", AllInSet, set("ab"), 1,1 ), - ), - "c", - ( 1,[],0), - ) - - def testIsInSet1( self ): - """Test simple IsInSet command""" - self.doBasicTest( - ( - ( "ab", IsInSet, set("ab"), 0 ), - ), - "abbaab", - ( 1,[("ab",0,1,None)],1), - ) - def testIsInSet2( self ): - """Test simple IsInSet command ignore fail""" - self.doBasicTest( - ( - ( "ab", IsInSet, set("ab"), 1,1), - ), - "c", - ( 1,[],0), - ) - if mxVersion >= ('2','1'): - def testIsInCharSet1( self ): - """Test simple IsInCharSet command""" - self.doBasicTest( - ( - ( "ab", IsInCharSet, CharSet("ab"), 0 ), - ), - "abbaab", - ( 1,[("ab",0,1,None)],1), - ) - def testIsInCharSet2( self ): - """Test simple IsInCharSet command ignore fail""" - self.doBasicTest( - ( - ( "ab", IsInCharSet, CharSet("ab"), 1,1), - ), - "c", - ( 1,[],0), - ) - - def testAllInCharSet1( self ): - """Test simple AllInSet command w/ CharSet object""" - self.doBasicTest( - ( - ( "ab", AllInCharSet, CharSet("ab"), 0 ), - ), - "abbaab", - ( 1,[("ab",0,6,None)],6), - ) - def testAllInCharSet2( self ): - """Test simple AllInSet command ignore fail""" - self.doBasicTest( - ( - ( "ab", AllInCharSet, CharSet("ab"), 1,1), - ), - "ccd", - ( 1,[],0), - ) - + def doBasicTest(self, table, testvalue, expected, startPosition=0 ): + result = tag( testvalue, table , startPosition) + assert result == expected, '''\n\texpected:%s\n\tgot:%s\n'''%( expected, result ) + def testAllIn1( self ): + """Test simple AllIn command""" + self.doBasicTest( + ( + ( "ab", AllIn, "ab", 0 ), + ), + "abbaab", + ( 1,[("ab",0,6,None)],6), + ) + def testAllIn2( self ): + """Test simple AllIn command ignore fail""" + self.doBasicTest( + ( + ( "ab", AllIn, "ab", 1,1 ), + ), + "c", + ( 1,[],0), + ) + def testAllIn3( self ): + """Test simple AllIn command w 2 items""" + self.doBasicTest( + ( + ( "ab", AllIn, "ab", 1,1 ), + ( "c", AllIn, "cde", 0 ), + ), + "abbaabccdd", + ( 1,[ + ("ab",0,6,None), + ("c",6,10,None), + + ],10), + ) + def testAllIn4( self ): + """Test simple AllIn command fail on second + + This should truncate the results list back to [], as well + as returning 0 as length. This is broken under + mx.TextTools 2.1.0b1! + """ + self.doBasicTest( + ( + ( "ab", AllIn, "ab", 1,1 ), + ( "c", AllIn, "cde", 0 ), + ), + "abbaab", + ( 0,[ + ],AnyInt), + ) + def testAllIn5( self ): + """Test simple AllIn command with None tagobj""" + self.doBasicTest( + ( + ( None, AllIn, "ab", 0 ), + ), + "abbaab", + ( 1,[],6), + ) + def testAllNotIn1( self ): + """Test simple AllNotIn command""" + self.doBasicTest( + ( + ( "ab", AllNotIn, "ab", 0 ), + ), + "ccddee", + ( 1,[("ab",0,6,None)],6), + ) + def testAllNotIn2( self ): + """Test simple AllNotIn command ignore fail""" + self.doBasicTest( + ( + ( "ab", AllNotIn, "ab", 1,1 ), + ), + "a", + ( 1,[],0), + ) + def testAllNotIn3( self ): + """Test simple AllNotIn command w 2 items""" + self.doBasicTest( + ( + ( "ab", AllNotIn, "ab", 1,1 ), + ( "c", AllNotIn, "cde", 0 ), + ), + "ccddabbaab", + ( 1,[ + ("ab",0,4,None), + ("c",4,10,None), + + ],10), + ) + + + def testIs1( self ): + """Test simple Is command""" + self.doBasicTest( + ( + ( "ab", Is, "a", 0 ), + ), + "abbaab", + ( 1,[("ab",0,1,None)],1), + ) + def testIs2( self ): + """Test simple Is command ignore fail""" + self.doBasicTest( + ( + ( "ab", Is, "a", 1,1), + ), + "c", + ( 1,[],0), + ) + + def testIsIn1( self ): + """Test simple IsIn command""" + self.doBasicTest( + ( + ( "ab", IsIn, "ab", 0 ), + ), + "abbaab", + ( 1,[("ab",0,1,None)],1), + ) + def testIsIn2( self ): + """Test simple IsIn command ignore fail""" + self.doBasicTest( + ( + ( "ab", IsIn, "ab", 1,1), + ), + "c", + ( 1,[],0), + ) + + def testIsNotIn1( self ): + """Test simple IsNotIn command""" + self.doBasicTest( + ( + ( "ab", IsNotIn, "ab", 0 ), + ), + "ccddee", + ( 1,[("ab",0,1,None)],1), + ) + def testIsNotIn2( self ): + """Test simple IsNotIn command ignore fail""" + self.doBasicTest( + ( + ( "ab", IsNotIn, "ab", 1,1), + ), + "abb", + ( 1,[],0), + ) + + + def testWord1( self ): + """Test simple Word command""" + self.doBasicTest( + ( + ( "ab", Word, "ab", 0 ), + ), + "ab", + ( 1,[("ab",0,2,None)],2), + ) + def testWord2( self ): + """Test simple Word command ignore fail""" + self.doBasicTest( + ( + ( "ab", Word, "ab", 1,1), + ), + "cd", + ( 1,[],0), + ) + def testWordStart1( self ): + """Test simple WordStart command""" + self.doBasicTest( + ( + ( "ab", WordStart, "ab", 0 ), + ), + "ddeeffab", + ( 1,[("ab",0,6,None)],6), + ) + def testWordStart2( self ): + """Test simple WordStart command ignore fail""" + self.doBasicTest( + ( + ( "ab", WordStart, "ab", 1,1), + ), + "cdffgg", + ( 1,[],0), + ) + + def testWordEnd1( self ): + """Test simple WordEnd command""" + self.doBasicTest( + ( + ( "ab", WordEnd, "ab", 0 ), + ), + "ddeeffab", + ( 1,[("ab",0,8,None)],8), + ) + def testWordEnd2( self ): + """Test simple WordEnd command ignore fail""" + self.doBasicTest( + ( + ( "ab", WordEnd, "ab", 1,1), + ), + "cdffgg", + ( 1,[],0), + ) + + def testAllInSet1( self ): + """Test simple AllInSet command""" + self.doBasicTest( + ( + ( b"ab", AllInSet, set(b"ab"), 0 ), + ), + b"abbaab", + ( 1,[(b"ab",0,6,None)],6), + ) + def testAllInSet2( self ): + """Test simple AllInSet command ignore fail""" + self.doBasicTest( + ( + ( b"ab", AllInSet, set(b"ab"), 1,1 ), + ), + b"c", + ( 1,[],0), + ) + + def testIsInSet1( self ): + """Test simple IsInSet command""" + self.doBasicTest( + ( + ( b"ab", IsInSet, set(b"ab"), 0 ), + ), + b"abbaab", + ( 1,[(b"ab",0,1,None)],1), + ) + def testIsInSet2( self ): + """Test simple IsInSet command ignore fail""" + self.doBasicTest( + ( + ( b"ab", IsInSet, set(b"ab"), 1,1), + ), + b"c", + ( 1,[],0), + ) + if mxVersion >= ('2','1'): + def testIsInCharSet1( self ): + """Test simple IsInCharSet command""" + self.doBasicTest( + ( + ( b"ab", IsInCharSet, CharSet(b"ab"), 0 ), + ), + b"abbaab", + ( 1,[(b"ab",0,1,None)],1), + ) + def testIsInCharSet2( self ): + """Test simple IsInCharSet command ignore fail""" + self.doBasicTest( + ( + ( "ab", IsInCharSet, CharSet("ab"), 1,1), + ), + "c", + ( 1,[],0), + ) + + def testAllInCharSet1( self ): + """Test simple AllInSet command w/ CharSet object""" + self.doBasicTest( + ( + ( "ab", AllInCharSet, CharSet("ab"), 0 ), + ), + "abbaab", + ( 1,[("ab",0,6,None)],6), + ) + def testAllInCharSet2( self ): + """Test simple AllInSet command ignore fail""" + self.doBasicTest( + ( + ( "ab", AllInCharSet, CharSet("ab"), 1,1), + ), + "ccd", + ( 1,[],0), + ) + - + def getSuite(): - return unittest.makeSuite(MXLowTests,'test') + return unittest.makeSuite(MXLowTests,'test') if __name__ == "__main__": - unittest.main(defaultTest="getSuite") + unittest.main(defaultTest="getSuite") diff -Nru simpleparse-2.1.0a1/tests/mx_recursive.py simpleparse-2.2.0/tests/mx_recursive.py --- simpleparse-2.1.0a1/tests/mx_recursive.py 2006-02-18 23:14:19.000000000 +0000 +++ simpleparse-2.2.0/tests/mx_recursive.py 2015-11-11 18:42:23.000000000 +0000 @@ -3,160 +3,160 @@ from simpleparse.stt.TextTools import * ab = ( - ( "ab", Word, "ab", 0 ), + ( "ab", Word, "ab", 0 ), ) cdef = ( - ( "cd", Word, "cd", 0 ), - ( "ef", Word, "ef", 1,1 ), + ( "cd", Word, "cd", 0 ), + ( "ef", Word, "ef", 1,1 ), ) tableList = [ ab, cdef ] - + class MXRecursiveTests(unittest.TestCase): - def doBasicTest(self, table, testvalue, expected, startPosition=0 ): - result = tag( testvalue, table , startPosition) - assert result == expected, '''\n\texpected:%s\n\tgot:%s\n'''%( expected, result ) - - def testAB( self ): - """Test AB testing command""" - self.doBasicTest( - ab, - "abcdef", - ( 1,[ - ("ab",0,2,None), - ],2), - ) - def testCDEF( self ): - """Test CDEF testing command""" - self.doBasicTest( - cdef, - "cdef", - ( 1,[ - ("cd",0,2,None), - ("ef",2,4,None), - ],4), - ) - def testABCDEF( self ): - """Test abcdef all together""" - self.doBasicTest( - ab+cdef, - "abcdef", - ( 1,[ - ("ab",0,2,None), - ("cd",2,4,None), - ("ef",4,6,None), - ],6), - ) - - def testTable1( self ): - """Test Table command""" - self.doBasicTest( - ( - ("first", Table, ab), - ("second", Table, cdef), - ), - "abcdef", - ( 1,[ - ("first",0,2,[ - ("ab",0,2,None), - ]), - ("second",2,6,[ - ("cd",2,4,None), - ("ef",4,6,None), - ]), - ],6), - ) - def testTableInList1( self ): - """Test TableInList command""" - self.doBasicTest( - ( - ("first", TableInList, (tableList,0)), - ("second", TableInList,(tableList,1)), - ), - "abcdef", - ( 1,[ - ("first",0,2,[ - ("ab",0,2,None), - ]), - ("second",2,6,[ - ("cd",2,4,None), - ("ef",4,6,None), - ]), - ],6), - ) - - def testSubTable1( self ): - """Test SubTable command""" - self.doBasicTest( - ( - ("first", SubTable, ab), - ("second", SubTable, cdef), - ), - "abcdef", - ( 1,[ - ("ab",0,2,None), - ("first", 0,2, None), - ("cd",2,4,None), - ("ef",4,6,None), - ("second", 2,6, None), - ],6), - ) - def testSubTable2( self ): - """Test SubTable command with no reporting of st groups""" - self.doBasicTest( - ( - (None, SubTable, ab), - (None, SubTable, cdef), - ), - "abcdef", - ( 1,[ - ("ab",0,2,None), - ("cd",2,4,None), - ("ef",4,6,None), - ],6), - ) - def testSubTableInList1( self ): - """Test SubTableInList command""" - self.doBasicTest( - ( - ("first", SubTableInList, (tableList,0)), - ("second", SubTableInList, (tableList,1)), - ), - "abcdef", - ( 1,[ - ("ab",0,2,None), - ("first", 0,2, None), - ("cd",2,4,None), - ("ef",4,6,None), - ("second", 2,6, None), - ],6), - ) - def testSubTableNotReturnRecursive( self ): - """Test that SubTable calls don't return a recursive structure""" - result = tag( "abcdef", ( - ("first", SubTableInList, (tableList,0)), - ("second", SubTableInList, (tableList,1)), - ), 0) - assert result [1] is not result[1][1][3], """Subtable results list was the same list as the list enclosing it, looped data structure created""" - - def testSubTableInList2( self ): - """Test SubTable command with no reporting of st groups""" - self.doBasicTest( - ( - (None, SubTableInList, (tableList,0)), - (None, SubTableInList, (tableList,1)), - ), - "abcdef", - ( 1,[ - ("ab",0,2,None), - ("cd",2,4,None), - ("ef",4,6,None), - ],6), - ) + def doBasicTest(self, table, testvalue, expected, startPosition=0 ): + result = tag( testvalue, table , startPosition) + assert result == expected, '''\n\texpected:%s\n\tgot:%s\n'''%( expected, result ) + + def testAB( self ): + """Test AB testing command""" + self.doBasicTest( + ab, + "abcdef", + ( 1,[ + ("ab",0,2,None), + ],2), + ) + def testCDEF( self ): + """Test CDEF testing command""" + self.doBasicTest( + cdef, + "cdef", + ( 1,[ + ("cd",0,2,None), + ("ef",2,4,None), + ],4), + ) + def testABCDEF( self ): + """Test abcdef all together""" + self.doBasicTest( + ab+cdef, + "abcdef", + ( 1,[ + ("ab",0,2,None), + ("cd",2,4,None), + ("ef",4,6,None), + ],6), + ) + + def testTable1( self ): + """Test Table command""" + self.doBasicTest( + ( + ("first", Table, ab), + ("second", Table, cdef), + ), + "abcdef", + ( 1,[ + ("first",0,2,[ + ("ab",0,2,None), + ]), + ("second",2,6,[ + ("cd",2,4,None), + ("ef",4,6,None), + ]), + ],6), + ) + def testTableInList1( self ): + """Test TableInList command""" + self.doBasicTest( + ( + ("first", TableInList, (tableList,0)), + ("second", TableInList,(tableList,1)), + ), + "abcdef", + ( 1,[ + ("first",0,2,[ + ("ab",0,2,None), + ]), + ("second",2,6,[ + ("cd",2,4,None), + ("ef",4,6,None), + ]), + ],6), + ) + + def testSubTable1( self ): + """Test SubTable command""" + self.doBasicTest( + ( + ("first", SubTable, ab), + ("second", SubTable, cdef), + ), + "abcdef", + ( 1,[ + ("ab",0,2,None), + ("first", 0,2, None), + ("cd",2,4,None), + ("ef",4,6,None), + ("second", 2,6, None), + ],6), + ) + def testSubTable2( self ): + """Test SubTable command with no reporting of st groups""" + self.doBasicTest( + ( + (None, SubTable, ab), + (None, SubTable, cdef), + ), + "abcdef", + ( 1,[ + ("ab",0,2,None), + ("cd",2,4,None), + ("ef",4,6,None), + ],6), + ) + def testSubTableInList1( self ): + """Test SubTableInList command""" + self.doBasicTest( + ( + ("first", SubTableInList, (tableList,0)), + ("second", SubTableInList, (tableList,1)), + ), + "abcdef", + ( 1,[ + ("ab",0,2,None), + ("first", 0,2, None), + ("cd",2,4,None), + ("ef",4,6,None), + ("second", 2,6, None), + ],6), + ) + def testSubTableNotReturnRecursive( self ): + """Test that SubTable calls don't return a recursive structure""" + result = tag( "abcdef", ( + ("first", SubTableInList, (tableList,0)), + ("second", SubTableInList, (tableList,1)), + ), 0) + assert result [1] is not result[1][1][3], """Subtable results list was the same list as the list enclosing it, looped data structure created""" + + def testSubTableInList2( self ): + """Test SubTable command with no reporting of st groups""" + self.doBasicTest( + ( + (None, SubTableInList, (tableList,0)), + (None, SubTableInList, (tableList,1)), + ), + "abcdef", + ( 1,[ + ("ab",0,2,None), + ("cd",2,4,None), + ("ef",4,6,None), + ],6), + ) - + def getSuite(): - return unittest.makeSuite(MXRecursiveTests,'test') + return unittest.makeSuite(MXRecursiveTests,'test') if __name__ == "__main__": - unittest.main(defaultTest="getSuite") + unittest.main(defaultTest="getSuite") \ No newline at end of file diff -Nru simpleparse-2.1.0a1/tests/mx_special.py simpleparse-2.2.0/tests/mx_special.py --- simpleparse-2.1.0a1/tests/mx_special.py 2006-02-18 23:14:28.000000000 +0000 +++ simpleparse-2.2.0/tests/mx_special.py 2015-11-11 19:01:43.000000000 +0000 @@ -1,145 +1,144 @@ """Low-level matching tests for mx.TextTools""" -import unittest, pprint +import unittest from simpleparse.stt.TextTools import * -import string from simpleparse.stt import TextTools -mxVersion = tuple(string.split( TextTools.__version__, '.')[:3]) -from genericvalues import AnyInt, NullResult +mxVersion = tuple(TextTools.__version__.split('.')[:3]) +from .genericvalues import AnyInt class MXSpecialTests(unittest.TestCase): - def doBasicTest(self, table, testvalue, expected, startPosition=0 ): - result = tag( testvalue, table , startPosition) - assert result == expected, '''\n\texpected:%s\n\tgot:%s\n'''%( expected, result ) - def testFail1( self ): - """Test Fail command""" - self.doBasicTest( - ( - ( "ab", Fail, None, 0 ), - ), - "abbaab", - ( 0,[ - ],AnyInt), - ) - def testFail2( self ): - """Test Fail command with ignore fail (Jump)""" - self.doBasicTest( - ( - ( "ab", Fail, None, 1), - ), - "abbaab", - ( 1,[ - ],0), - ) - - def testSkip1( self ): - """Test Skip command""" - self.doBasicTest( - ( - ( "ab", Skip, 1, 0 ), - ), - "abbaab", - ( 1,[ - ("ab",0,1,None), - ],1), - ) - def testSkip2( self ): - """Test Skip command with negative to before buffer - - Note: I don't like this, but it's what we should expect - from the system, so blah. Would be better IMO to have - success (within the buffer) and failure (outside the buffer) - but then we need a way to spell (jump, even outside buffer) - - Should have a test for what to do when we have AppendMatch - flag in this case... - """ - self.failUnlessRaises( TypeError, - self.doBasicTest, - ( - ( "ab", Skip, -1, 0 ), - ), - "abbaab", - ( 1,[ - ("ab",0,-1,None), - ],-1), - ) - - def testMove1( self ): - """Test Move command - XXX Should have tests for after buffer moves - """ - self.doBasicTest( - ( - ( "ab", Move, 4, 0 ), - ), - "abbaab", - ( 1,[ - ("ab",0,4,None), - ],4), - ) - def testMove2( self ): - """Test Move command with negative to middle of buffer - XXX should have tests for before buffer - - Note: this command is non-intuitive for Python users, - the negative slicing is 1 beyond what it would be for Python - (i.e. -1 in Python is 1 before the end, whereas in this - command it is the end) - """ - self.doBasicTest( - ( - ( "ab", Move, -4, 0 ), - ), - "abbaab", - ( 1,[ - ("ab",0,3,None), - ],3), - ) - def testMove3( self ): - """Test Move command - """ - self.doBasicTest( - ( - ( "ab", Move, 7, 0 ), - ), - "abbaab", - ( 1,[ - ("ab",0,7,None), - ],7), - ) - def testMove4( self ): - """Test Move to EOF - """ - self.doBasicTest( - ( - ( "ab", Move, ToEOF, 0), - ), - "abbaab", - ( 1,[ - ("ab",0,6,None), - ],6), - ) - - def testEOF1( self ): - """Test EOF command - - Although it's not documented, the original code returned - the EOF position as the left and right coords for the match, - so we mimic that behaviour now. - """ - self.doBasicTest( - ( - ( "ab", Move, 7, 1 ), - ( "c", EOF, Here, 0 ), - ), - "abbaab", - ( 1,[ - ("ab",0,7,None), - ("c",6,6,None), - ],6), - ) - + def doBasicTest(self, table, testvalue, expected, startPosition=0 ): + result = tag( testvalue, table , startPosition) + assert result == expected, '''\n\texpected:%s\n\tgot:%s\n'''%( expected, result ) + def testFail1( self ): + """Test Fail command""" + self.doBasicTest( + ( + ( "ab", Fail, None, 0 ), + ), + "abbaab", + ( 0,[ + ],AnyInt), + ) + def testFail2( self ): + """Test Fail command with ignore fail (Jump)""" + self.doBasicTest( + ( + ( "ab", Fail, None, 1), + ), + "abbaab", + ( 1,[ + ],0), + ) + + def testSkip1( self ): + """Test Skip command""" + self.doBasicTest( + ( + ( "ab", Skip, 1, 0 ), + ), + "abbaab", + ( 1,[ + ("ab",0,1,None), + ],1), + ) + def testSkip2( self ): + """Test Skip command with negative to before buffer + + Note: I don't like this, but it's what we should expect + from the system, so blah. Would be better IMO to have + success (within the buffer) and failure (outside the buffer) + but then we need a way to spell (jump, even outside buffer) + + Should have a test for what to do when we have AppendMatch + flag in this case... + """ + self.assertRaises( TypeError, + self.doBasicTest, + ( + ( "ab", Skip, -1, 0 ), + ), + "abbaab", + ( 1,[ + ("ab",0,-1,None), + ],-1), + ) + + def testMove1( self ): + """Test Move command + XXX Should have tests for after buffer moves + """ + self.doBasicTest( + ( + ( "ab", Move, 4, 0 ), + ), + "abbaab", + ( 1,[ + ("ab",0,4,None), + ],4), + ) + def testMove2( self ): + """Test Move command with negative to middle of buffer + XXX should have tests for before buffer + + Note: this command is non-intuitive for Python users, + the negative slicing is 1 beyond what it would be for Python + (i.e. -1 in Python is 1 before the end, whereas in this + command it is the end) + """ + self.doBasicTest( + ( + ( "ab", Move, -4, 0 ), + ), + "abbaab", + ( 1,[ + ("ab",0,3,None), + ],3), + ) + def testMove3( self ): + """Test Move command + """ + self.doBasicTest( + ( + ( "ab", Move, 7, 0 ), + ), + "abbaab", + ( 1,[ + ("ab",0,7,None), + ],7), + ) + def testMove4( self ): + """Test Move to EOF + """ + self.doBasicTest( + ( + ( "ab", Move, ToEOF, 0), + ), + "abbaab", + ( 1,[ + ("ab",0,6,None), + ],6), + ) + + def testEOF1( self ): + """Test EOF command + + Although it's not documented, the original code returned + the EOF position as the left and right coords for the match, + so we mimic that behaviour now. + """ + self.doBasicTest( + ( + ( "ab", Move, 7, 1 ), + ( "c", EOF, Here, 0 ), + ), + "abbaab", + ( 1,[ + ("ab",0,7,None), + ("c",6,6,None), + ],6), + ) + ## def testEOF2( self ): ## """Test EOF command when before buffer (can't test this any more, because of new sanity check raising error before we get to check)""" ## self.doBasicTest( @@ -151,84 +150,84 @@ ## ( 0,[ ## ],0), ## ) - def testEOF3( self ): - """Test EOF command when in middle of buffer""" - self.doBasicTest( - ( - ( "ab", Move, 3, 1 ), - ( "c", EOF, Here, 0 ), - ), - "abbaab", - ( 0,[ - ],AnyInt), - ) - def testJumpBeforeTable( self ): - """Test Jump to before table (explicit fail) - - Note: this reports the position attained by the - matching child (2) as the "error position", not - the position before that child (0). - """ - self.doBasicTest( - ( - ("ab",Word,"ab",1,-3), - ), - "abbaab", - ( 0,[ - ],AnyInt), - ) - ### tests for ObjectGenerator-idioms - def testNegativeOptString1( self ): - """Negative, optional string value with positive match (should return 0 as length of match)""" - self.doBasicTest( - ( - (None, WordEnd, 'test', 2, 1), - (None, Skip, -4, 2, 2), - (None, Skip, 1) - ), - "test", - (1,[ - ],0), - ) - def testBMSMove( self ): - """Negative, optional string value""" - self.doBasicTest( - ( - (None, sWordStart, BMS( "cd" ),1,2), - (None, Move, ToEOF ) - ), - "a", - (1,[ - ],1), - ) - - if mxVersion >= ('2','1'): - def testJumpTargetNamed( self ): - """Test JumpTarget command with tagobj specified""" - self.doBasicTest( - ( - ( "ab", JumpTarget, "SomeString" ), - ), - "abbaab", - ( 1,[ - ("ab",0,0,None), - ],0), - ) - def testJumpTarget( self ): - """Test JumpTarget command in normal usage""" - self.doBasicTest( - ( - "this", - ), - "abbaab", - ( 1,[ - ],0), - ) - + def testEOF3( self ): + """Test EOF command when in middle of buffer""" + self.doBasicTest( + ( + ( "ab", Move, 3, 1 ), + ( "c", EOF, Here, 0 ), + ), + "abbaab", + ( 0,[ + ],AnyInt), + ) + def testJumpBeforeTable( self ): + """Test Jump to before table (explicit fail) + + Note: this reports the position attained by the + matching child (2) as the "error position", not + the position before that child (0). + """ + self.doBasicTest( + ( + ("ab",Word,"ab",1,-3), + ), + "abbaab", + ( 0,[ + ],AnyInt), + ) + ### tests for ObjectGenerator-idioms + def testNegativeOptString1( self ): + """Negative, optional string value with positive match (should return 0 as length of match)""" + self.doBasicTest( + ( + (None, WordEnd, 'test', 2, 1), + (None, Skip, -4, 2, 2), + (None, Skip, 1) + ), + "test", + (1,[ + ],0), + ) + def testBMSMove( self ): + """Negative, optional string value""" + self.doBasicTest( + ( + (None, sWordStart, BMS( "cd" ),1,2), + (None, Move, ToEOF ) + ), + "a", + (1,[ + ],1), + ) + + if mxVersion >= ('2','1'): + def testJumpTargetNamed( self ): + """Test JumpTarget command with tagobj specified""" + self.doBasicTest( + ( + ( b"ab", JumpTarget, b"SomeString" ), + ), + b"abbaab", + ( 1,[ + (b"ab",0,0,None), + ],0), + ) + def testJumpTarget( self ): + """Test JumpTarget command in normal usage""" + self.doBasicTest( + ( + b"this", + ), + b"abbaab", + ( 1,[ + ],0), + ) + - + def getSuite(): - return unittest.makeSuite(MXSpecialTests,'test') + return unittest.makeSuite(MXSpecialTests,'test') if __name__ == "__main__": - unittest.main(defaultTest="getSuite") + unittest.main(defaultTest="getSuite") diff -Nru simpleparse-2.1.0a1/tests/mx_test.py simpleparse-2.2.0/tests/mx_test.py --- simpleparse-2.1.0a1/tests/mx_test.py 2002-07-09 06:25:14.000000000 +0000 +++ simpleparse-2.2.0/tests/mx_test.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,20 +0,0 @@ -import mx_low, mx_flag, mx_high, mx_special, mx_recursive -import unittest - - -def getSuite(): - set = [] - for module in [ - mx_low, - mx_flag, - mx_high, - mx_special, - mx_recursive - ]: - set.append( module.getSuite() ) - return unittest.TestSuite( - set - ) - -if __name__ == "__main__": - unittest.main(defaultTest="getSuite") diff -Nru simpleparse-2.1.0a1/tests/test_backup_on_subtable_failure.py simpleparse-2.2.0/tests/test_backup_on_subtable_failure.py --- simpleparse-2.1.0a1/tests/test_backup_on_subtable_failure.py 2006-02-18 23:14:55.000000000 +0000 +++ simpleparse-2.2.0/tests/test_backup_on_subtable_failure.py 2015-11-11 18:42:23.000000000 +0000 @@ -1,3 +1,5 @@ +from __future__ import print_function + declaration = r'''testparser := (a,b)* a := 'a' b := 'b' @@ -13,7 +15,6 @@ parser = Parser( declaration ).generator.buildParser('testparser' ) result = TextTools.tag( testdata, parser ) if result != expectedResult: - print 'backup-on-subtable-test failed' - print '\texpected', pprint.pprint( expectedResult ) - print '\tgot', pprint.pprint( result ) - + print('backup-on-subtable-test failed') + print('\texpected', pprint.pprint( expectedResult )) + print('\tgot', pprint.pprint( result )) diff -Nru simpleparse-2.1.0a1/tests/test_common_chartypes.py simpleparse-2.2.0/tests/test_common_chartypes.py --- simpleparse-2.1.0a1/tests/test_common_chartypes.py 2002-07-10 05:14:48.000000000 +0000 +++ simpleparse-2.2.0/tests/test_common_chartypes.py 2015-11-11 18:42:23.000000000 +0000 @@ -1,78 +1,91 @@ import unittest, string from simpleparse.parser import Parser from simpleparse.common import chartypes, timezone_names +assert chartypes from simpleparse import dispatchprocessor -fulltrans = string.maketrans("","") +try: + fulltrans = string.maketrans(b"",b"") + translate = string.translate +except AttributeError: + fulltrans = bytes.maketrans(b"",b"") + translate = bytes.translate class CommonTests(unittest.TestCase): - def doBasicTest(self, definition, parserName, testValue, expected, ): - result = Parser( definition).parse( testValue, parserName ) - assert result == expected, '''\nexpected:%s\n got:%s\n'''%( expected, result ) - def _testSet( self, set, singleName, multiName ): - """Test multi-line definitions""" - decl = """single := %s multiple := %s"""%( singleName, multiName ) - p = Parser(decl) - notset = string.translate( fulltrans, fulltrans, set ) - for char in set: - success, children, next = p.parse( char, singleName) - assert success and (next == 1), """Parser for %s couldn't parse %s"""%( singleName, char ) - for char in notset: - success, children, next = p.parse( char, singleName) - assert (not success) and (next == 0), """Parser for %s parsed %s"""%( singleName, char ) - success, children, next = p.parse( char, multiName) - assert (not success) and (next == 0), """Parser for %s parsed %s"""%( multiName, char ) - success, children, next = p.parse( set, multiName) - assert success and (next == len(set)), """Parser for %s couldn't parse full set of chars, failed at %s"""%( multiName, set[next:] ) - def testBasic( self ): - for set, single, multiple in ( - ("digits", "digit", "digits"), - ("uppercase", "uppercasechar", "uppercase"), - ("lowercase", "lowercasechar", "lowercase"), - ("letters", "letter", "letters"), - ("whitespace", "whitespacechar", "whitespace"), - ("octdigits", "octdigit", "octdigits"), - ("hexdigits", "hexdigit", "hexdigits"), - ("printable", "printablechar", "printable"), - ("punctuation", "punctuationchar", "punctuation"), + def doBasicTest(self, definition, parserName, testValue, expected, ): + result = Parser( definition).parse( testValue, parserName ) + assert result == expected, '''\nexpected:%s\n got:%s\n'''%( expected, result ) + def _testSet( self, set, singleName, multiName ): + """Test multi-line definitions""" + decl = """single := %s multiple := %s"""%( singleName, multiName ) + p = Parser(decl) + notset = translate( fulltrans, fulltrans, set ) + for char in set: + if isinstance(char,int): + char = chr(char) + success, children, next = p.parse( char, singleName) + assert success and (next == 1), """Parser for %s couldn't parse %s"""%( singleName, char ) + for char in notset: + if isinstance(char,int): + char = chr(char) + success, children, next = p.parse( char, singleName) + assert (not success) and (next == 0), """Parser for %s parsed %s"""%( singleName, char ) + success, children, next = p.parse( char, multiName) + assert (not success) and (next == 0), """Parser for %s parsed %s"""%( multiName, char ) + success, children, next = p.parse( set, multiName) + assert success and (next == len(set)), """Parser for %s couldn't parse full set of chars, failed at %s"""%( multiName, set[next:] ) + def testBasic( self ): + for set, single, multiple in ( + ("digits", "digit", "digits"), + ("ascii_uppercase", "uppercasechar", "uppercase"), + ("ascii_lowercase", "lowercasechar", "lowercase"), + ("ascii_letters", "letter", "letters"), + ("whitespace", "whitespacechar", "whitespace"), + ("octdigits", "octdigit", "octdigits"), + ("hexdigits", "hexdigit", "hexdigits"), + ("printable", "printablechar", "printable"), + ("punctuation", "punctuationchar", "punctuation"), - ("ascii_lowercase", "ascii_lowercasechar", "ascii_lowercase"), - ("ascii_uppercase", "ascii_uppercasechar", "ascii_uppercase"), - ): - try: - set = getattr( string, set) - self._testSet( - set, - single, - multiple, - ) - except AttributeError: - pass - def testEOF( self ): - p = Parser( """this := 'a',EOF""", 'this') - success, children, next = p.parse( 'a' ) - assert success, """EOF didn't match at end of string""" - def testEOFFail( self ): - p = Parser( """this := 'a',EOF""", 'this') - success, children, next = p.parse( 'a ' ) - assert not success, """EOF matched before end of string""" - - def testTZ( self ): - names = timezone_names.timezone_mapping.keys() - names.sort() # tests that the items don't match shorter versions... - decl = Parser("""this := (timezone_name, ' '?)+""", 'this') - proc = dispatchprocessor.DispatchProcessor() - proc.timezone_name = timezone_names.TimeZoneNameInterpreter() - text = string.join( names, ' ') - success, result, next = decl.parse( text, processor = proc ) - assert success, """Unable to complete parsing the timezone names, stopped parsing at char %s %s"""%(next, text[next:]) - assert result == map( timezone_names.timezone_mapping.get, names), """Got different results for interpretation than expected (expected first, recieved second)\n%s\n%s"""%(map( timezone_names.timezone_mapping.get, names), result) - - - - + ("ascii_lowercase", "ascii_lowercasechar", "ascii_lowercase"), + ("ascii_uppercase", "ascii_uppercasechar", "ascii_uppercase"), + ): + try: + set = getattr( string, set) + self._testSet( + set.encode('ascii'), + single, + multiple, + ) + except AttributeError: + raise + except TypeError as err: + err.args += (set,single,multiple) + raise + def testEOF( self ): + p = Parser( """this := 'a',EOF""", 'this') + success, children, next = p.parse( 'a' ) + assert success, """EOF didn't match at end of string""" + def testEOFFail( self ): + p = Parser( """this := 'a',EOF""", 'this') + success, children, next = p.parse( 'a ' ) + assert not success, """EOF matched before end of string""" + + def testTZ( self ): + names = list(timezone_names.timezone_mapping.keys()) + names.sort() # tests that the items don't match shorter versions... + decl = Parser("""this := (timezone_name, ' '?)+""", 'this') + proc = dispatchprocessor.DispatchProcessor() + proc.timezone_name = timezone_names.TimeZoneNameInterpreter() + text = ' '.join(names) + success, result, next = decl.parse( text, processor = proc ) + assert success, """Unable to complete parsing the timezone names, stopped parsing at char %s %s"""%(next, text[next:]) + assert result == list(map( timezone_names.timezone_mapping.get, names)), """Got different results for interpretation than expected (expected first, recieved second)\n%s\n%s"""%(list(map( timezone_names.timezone_mapping.get, names)), result) + + + + def getSuite(): - return unittest.makeSuite(CommonTests, 'test') + return unittest.makeSuite(CommonTests, 'test') if __name__ == "__main__": - unittest.main(defaultTest="getSuite") + unittest.main(defaultTest="getSuite") diff -Nru simpleparse-2.1.0a1/tests/test_common_comments.py simpleparse-2.2.0/tests/test_common_comments.py --- simpleparse-2.1.0a1/tests/test_common_comments.py 2006-02-19 00:30:16.000000000 +0000 +++ simpleparse-2.2.0/tests/test_common_comments.py 2015-11-11 18:42:23.000000000 +0000 @@ -1,80 +1,80 @@ """Test the various common library comment productions""" -import unittest, string +import unittest from simpleparse.parser import Parser from simpleparse.common import comments from simpleparse import dispatchprocessor parseTests = [ - # each production should match the whole of all of the first, - # and not match any of the second... - ("c_comment", [ - """/* this */""", - """/* this \n\n*/""", - ],[ - """// this""", - """# this""", - """# this\n""", - """# this\r\n""", - ]), - ("c_nest_comment", [ - """/* this */""", - """/* this \n\n*/""", - """/* /* this */ */""", - """/* /* this \n*/ */""", - ],[ - """// this""", - """# this""", - """; this""", - ]), - ("hash_comment", [ - """# this""", - """# this\n""", - """# this\r\n""", - ],[ - """// this""", - """/* this */""", - """/* /* this */ */""", - ]), - ("semicolon_comment", [ - """; this""", - """; this\n""", - """; this\r\n""", - ],[ - """# this""", - """// this""", - """/* this */""", - """/* /* this */ */""", - ]), - ("slashslash_comment", [ - """// this""", - """// this\n""", - """// this\r\n""", - ],[ - """# this""", - """/ this""", - """/* this */""", - """/* /* this */ */""", - ]), + # each production should match the whole of all of the first, + # and not match any of the second... + ("c_comment", [ + """/* this */""", + """/* this \n\n*/""", + ],[ + """// this""", + """# this""", + """# this\n""", + """# this\r\n""", + ]), + ("c_nest_comment", [ + """/* this */""", + """/* this \n\n*/""", + """/* /* this */ */""", + """/* /* this \n*/ */""", + ],[ + """// this""", + """# this""", + """; this""", + ]), + ("hash_comment", [ + """# this""", + """# this\n""", + """# this\r\n""", + ],[ + """// this""", + """/* this */""", + """/* /* this */ */""", + ]), + ("semicolon_comment", [ + """; this""", + """; this\n""", + """; this\r\n""", + ],[ + """# this""", + """// this""", + """/* this */""", + """/* /* this */ */""", + ]), + ("slashslash_comment", [ + """// this""", + """// this\n""", + """// this\r\n""", + ],[ + """# this""", + """/ this""", + """/* this */""", + """/* /* this */ */""", + ]), ] class CommonTests(unittest.TestCase): - def testBasic( self ): - for production, yestable, notable in parseTests: - p = Parser( "x := %s"%production, 'x') - for data in yestable: - success, results, next = p.parse( data) - assert success and (next == len(data)), """Did not parse comment %s as a %s result=%s"""%( repr(data), production, (success, results, next)) - assert results, """Didn't get any results for comment %s as a %s result=%s"""%( repr(data), production, (success, results, next)) - for data in notable: - success, results, next = p.parse( data) - assert not success, """Parsed %s of %s as a %s result=%s"""%( - next, repr(data), production, results - ) - + def testBasic( self ): + for production, yestable, notable in parseTests: + p = Parser( "x := %s"%production, 'x') + for data in yestable: + success, results, next = p.parse( data) + assert success and (next == len(data)), """Did not parse comment %s as a %s result=%s"""%( repr(data), production, (success, results, next)) + assert results, """Didn't get any results for comment %s as a %s result=%s"""%( repr(data), production, (success, results, next)) + for data in notable: + success, results, next = p.parse( data) + assert not success, """Parsed %s of %s as a %s result=%s"""%( + next, repr(data), production, results + ) + def getSuite(): - return unittest.makeSuite(CommonTests, 'test') + return unittest.makeSuite(CommonTests, 'test') if __name__ == "__main__": - unittest.main(defaultTest="getSuite") + unittest.main(defaultTest="getSuite") diff -Nru simpleparse-2.1.0a1/tests/test_common_iso_date.py simpleparse-2.2.0/tests/test_common_iso_date.py --- simpleparse-2.1.0a1/tests/test_common_iso_date.py 2004-01-31 23:54:48.000000000 +0000 +++ simpleparse-2.2.0/tests/test_common_iso_date.py 2015-11-11 18:42:23.000000000 +0000 @@ -1,122 +1,122 @@ -import unittest, string +import unittest, string, logging from simpleparse.parser import Parser from simpleparse.common import iso_date, iso_date_loose -from mx import DateTime -import time +log = logging.getLogger(__name__) +try: + from mx import DateTime +except ImportError: + log.warn("No mx.DateTime module available") +else: + import time + try: + fulltrans = string.maketrans(b"",b"") + except AttributeError: + fulltrans = bytes.maketrans(b"",b"") + tzOffset = DateTime.DateTimeDelta( 0,0,0, time.timezone ) -fulltrans = string.maketrans("","") -tzOffset = DateTime.DateTimeDelta( 0,0,0, time.timezone ) + class CommonTests(unittest.TestCase): + def testISODateLoose( self ): + """Test the parsing of ISO date and time formats""" + values = [ + ("2002-02-03", DateTime.DateTime( 2002, 2,3)), + ("2002-02",DateTime.DateTime( 2002, 2)), + ("2002",DateTime.DateTime( 2002)), + ("2002-02-03 04:15", DateTime.DateTime( 2002, 2,3, 4,15)), + ("2002-02-03 04:15:16", DateTime.DateTime( 2002, 2,3, 4,15, 16)), + ("2002-02-03 04:15:16 +00:00", DateTime.DateTime( 2002, 2,3, 4,15, 16)-tzOffset), + ("2002-02-03 4:5", DateTime.DateTime( 2002, 2,3, 4,5)), + ("2002-02-03 4:5:16", DateTime.DateTime( 2002, 2,3, 4,5, 16)), + ("2002-02-03 4:5:16 +00:00", DateTime.DateTime( 2002, 2,3, 4, 5,16)-tzOffset), + ] + p = Parser ("d:= ISO_date_time_loose", "d") + proc = iso_date_loose.MxInterpreter() + for to_parse, date in values: + success, children, next = p.parse( to_parse, processor = proc) + assert success, """Unable to parse any of the string %s with the ISO date-time parser"""% (to_parse) + assert next == len(to_parse),"""Did not finish parsing string %s with the ISO date-time parser, remainder was %s, found was %s"""%( to_parse, to_parse [next:],children) + assert children [0] == date,"""Returned different date for string %s than expected, got %s, expected %s"""% (to_parse,children [0], date) + def testISODate( self ): + """Test the parsing of ISO date and time formats""" + values = [ + ("2002-02-03", DateTime.DateTime( 2002, 2,3)), + ("2002-02",DateTime.DateTime( 2002, 2)), + ("2002",DateTime.DateTime( 2002)), + ("2002-02-03T04:15", DateTime.DateTime( 2002, 2,3, 4,15)), + ("2002-02-03T04:15:16", DateTime.DateTime( 2002, 2,3, 4,15, 16)), + ("2002-02-03T04:15:16+00:00", DateTime.DateTime( 2002, 2,3, 4,15, 16)-tzOffset), + ] + p = Parser ("d:= ISO_date_time", "d") + proc = iso_date.MxInterpreter() + for to_parse, date in values: + success, children, next = p.parse( to_parse, processor=proc) + assert success, """Unable to parse any of the string %s with the ISO date-time parser"""% (to_parse) + assert next == len(to_parse),"""Did not finish parsing string %s with the ISO date-time parser, remainder was %s, found was %s"""%( to_parse, to_parse [next:],children) + assert children [0] == date,"""Returned different date for string %s than expected, got %s, expected %s"""% (to_parse,children [0], date) + def testProductionsStrict( self ): + for to_parse, production in [ + ("2002", "year"), + ("02", "month"), + ("02", "day"), + ("24:00:00", "ISO_time"), + ("02", "ISO_time"), + (":", "time_separator"), + ("02:02", "ISO_time"), + ("02:02:02", "ISO_time"), + ("2002-02-30", "ISO_date"), + ("2002-02-30", "ISO_date_time"), + ("02", "hour"), + ("02", "minute"), + ("02", "second"), + ("20", "second"), -class CommonTests(unittest.TestCase): - def testISODateLoose( self ): - """Test the parsing of ISO date and time formats""" - values = [ - ("2002-02-03", DateTime.DateTime( 2002, 2,3)), - ("2002-02",DateTime.DateTime( 2002, 2)), - ("2002",DateTime.DateTime( 2002)), - ("2002-02-03 04:15", DateTime.DateTime( 2002, 2,3, 4,15)), - ("2002-02-03 04:15:16", DateTime.DateTime( 2002, 2,3, 4,15, 16)), - ("2002-02-03 04:15:16 +00:00", DateTime.DateTime( 2002, 2,3, 4,15, 16)-tzOffset), - ("2002-02-03 4:5", DateTime.DateTime( 2002, 2,3, 4,5)), - ("2002-02-03 4:5:16", DateTime.DateTime( 2002, 2,3, 4,5, 16)), - ("2002-02-03 4:5:16 +00:00", DateTime.DateTime( 2002, 2,3, 4, 5,16)-tzOffset), - ] - p = Parser ("d:= ISO_date_time_loose", "d") - proc = iso_date_loose.MxInterpreter() - for string, date in values: - success, children, next = p.parse( string, processor = proc) - assert success, """Unable to parse any of the string %s with the ISO date-time parser"""% (string) - assert next == len(string),"""Did not finish parsing string %s with the ISO date-time parser, remainder was %s, found was %s"""%( string, string [next:],children) - assert children [0] == date,"""Returned different date for string %s than expected, got %s, expected %s"""% (string,children [0], date) - def testISODate( self ): - """Test the parsing of ISO date and time formats""" - values = [ - ("2002-02-03", DateTime.DateTime( 2002, 2,3)), - ("2002-02",DateTime.DateTime( 2002, 2)), - ("2002",DateTime.DateTime( 2002)), - ("2002-02-03T04:15", DateTime.DateTime( 2002, 2,3, 4,15)), - ("2002-02-03T04:15:16", DateTime.DateTime( 2002, 2,3, 4,15, 16)), - ("2002-02-03T04:15:16+00:00", DateTime.DateTime( 2002, 2,3, 4,15, 16)-tzOffset), - ] - p = Parser ("d:= ISO_date_time", "d") - proc = iso_date.MxInterpreter() - for string, date in values: - success, children, next = p.parse( string, processor=proc) - assert success, """Unable to parse any of the string %s with the ISO date-time parser"""% (string) - assert next == len(string),"""Did not finish parsing string %s with the ISO date-time parser, remainder was %s, found was %s"""%( string, string [next:],children) - assert children [0] == date,"""Returned different date for string %s than expected, got %s, expected %s"""% (string,children [0], date) - def testProductionsStrict( self ): - for string, production in [ - ("2002", "year"), - ("02", "month"), - ("02", "day"), - ("24:00:00", "ISO_time"), - ("02", "ISO_time"), - (":", "time_separator"), - ("02:02", "ISO_time"), - ("02:02:02", "ISO_time"), - ("2002-02-30", "ISO_date"), - ("2002-02-30", "ISO_date_time"), - ("02", "hour"), - ("02", "minute"), - ("02", "second"), - ("20", "second"), + ("+0500", "offset"), + ("+00:00", "offset"), + ("-", "offset_sign"), + ("-00:00", "offset"), + ("-04:00", "offset"), + ("-0500", "offset"), + ("02:13", "ISO_time"), + ("02:13:16", "ISO_time"), + ("2002-02-01T02:13-0500", "ISO_date_time"), + ]: + success, children, next = iso_date._p.parse( to_parse,production) + assert next == len(to_parse), "couldn't parse %s as a %s"%( to_parse, production) + + def testProductions2( self ): + for to_parse, production in [ + ("2002", "year"), + ("02", "month"), + ("02", "day"), + ("24:00:00", "ISO_time_loose"), + ("02", "ISO_time_loose"), + (":", "time_separator"), + ("02:02", "ISO_time_loose"), + ("02:02:02", "ISO_time_loose"), + ("2002-02-30", "ISO_date_loose"), + ("2002-02-30", "ISO_date_time_loose"), + ("2002-2-1", "ISO_date_time_loose"), + ("02", "hour"), + ("02", "minute"), + ("2", "second"), + ("02", "second"), + ("20", "second"), + ("20.", "second"), + ("20.3", "second"), - ("+0500", "offset"), - ("+00:00", "offset"), - ("-", "offset_sign"), - ("-00:00", "offset"), - ("-04:00", "offset"), - ("-0500", "offset"), - ("02:13", "ISO_time"), - ("02:13:16", "ISO_time"), - ("2002-02-01T02:13-0500", "ISO_date_time"), - ]: - success, children, next = iso_date._p.parse( string,production) - assert next == len(string), "couldn't parse %s as a %s"%( string, production) - - def testProductions2( self ): - for string, production in [ - ("2002", "year"), - ("02", "month"), - ("02", "day"), - ("24:00:00", "ISO_time_loose"), - ("02", "ISO_time_loose"), - (":", "time_separator"), - ("02:02", "ISO_time_loose"), - ("02:02:02", "ISO_time_loose"), - ("2002-02-30", "ISO_date_loose"), - ("2002-02-30", "ISO_date_time_loose"), - ("2002-2-1", "ISO_date_time_loose"), - ("02", "hour"), - ("02", "minute"), - ("2", "second"), - ("02", "second"), - ("20", "second"), - ("20.", "second"), - ("20.3", "second"), - - ("+0500", "offset"), - ("+00:00", "offset"), - ("-", "offset_sign"), - ("-00:00", "offset"), - ("-04:00", "offset"), - ("-0500", "offset"), - ("02:13", "ISO_time_loose"), - ("02:13:16", "ISO_time_loose"), - ("2002-2-1 2:13", "ISO_date_time_loose"), - ("2002-2-1 2:13 -0500", "ISO_date_time_loose"), - ("2002-2-1 2:13 -05:30", "ISO_date_time_loose"), - ("2002-2-1 2:13 +05:30", "ISO_date_time_loose"), - ("2002-2-1 2:13 +00:00", "ISO_date_time_loose"), - - ]: - success, children, next = iso_date_loose._p.parse( string,production ) - assert next == len(string), "couldn't parse %s as a %s"%( string, production) - - -def getSuite(): - return unittest.makeSuite(CommonTests, 'test') - -if __name__ == "__main__": - unittest.main(defaultTest="getSuite") + ("+0500", "offset"), + ("+00:00", "offset"), + ("-", "offset_sign"), + ("-00:00", "offset"), + ("-04:00", "offset"), + ("-0500", "offset"), + ("02:13", "ISO_time_loose"), + ("02:13:16", "ISO_time_loose"), + ("2002-2-1 2:13", "ISO_date_time_loose"), + ("2002-2-1 2:13 -0500", "ISO_date_time_loose"), + ("2002-2-1 2:13 -05:30", "ISO_date_time_loose"), + ("2002-2-1 2:13 +05:30", "ISO_date_time_loose"), + ("2002-2-1 2:13 +00:00", "ISO_date_time_loose"), + + ]: + success, children, next = iso_date_loose._p.parse( to_parse,production ) + assert next == len(to_parse), "couldn't parse %s as a %s"%( to_parse, production) diff -Nru simpleparse-2.1.0a1/tests/test_common_numbers.py simpleparse-2.2.0/tests/test_common_numbers.py --- simpleparse-2.1.0a1/tests/test_common_numbers.py 2002-07-10 05:14:48.000000000 +0000 +++ simpleparse-2.2.0/tests/test_common_numbers.py 2015-11-11 18:42:23.000000000 +0000 @@ -1,184 +1,184 @@ -import unittest, string +import unittest from simpleparse.parser import Parser from simpleparse.common import numbers from simpleparse import dispatchprocessor _data = [ - ( - "int_unsigned", numbers.IntInterpreter, - [ # should match, value, length that should match, expected result - ("0 ", 1, 0), - ("1 ", 1, 1), - ("23 ",2, 23), - ("0x ", 1,0), - ("0. ", 1,0), - ], - [ # should not match... - ".0", - "a", - ], - ), - ( - "int", numbers.IntInterpreter, - [ # should match, value, length that should match, expected result - ("0 ", 1, 0), - ("1 ", 1, 1), - ("23 ",2, 23), - ("0x ", 1,0), - ("0. ", 1,0), - ("+0 ", 2, 0), - ("+1 ", 2, 1), - ("+23 ",3, 23), - ("+0x ", 2,0), - ("+0. ", 2,0), - ("-0 ", 2, 0), - ("-1 ", 2, -1), - ("-23 ",3, -23), - ("-0x ", 2,0), - ("-0. ", 2,0), - ], - [ # should not match... - ".0", - "a", - "+.0", - "+a", - "-.0", - "-a", - ], - ), - ( - "hex", numbers.HexInterpreter, - [ # should match, value, length that should match, expected result - ("0x0 ", 3, 0), - ("0x1 ", 3, 1), - ("0x23 ",4, 35), - ("0x0x ", 3,0), - ("0x0. ", 3,0), - ("+0x0 ", 4, 0), - ("+0x1 ", 4, 1), - ("+0x23 ",5, 35), - ("+0x0x ", 4,0), - ("+0x0. ", 4,0), - ("-0x0 ", 4, 0), - ("-0x1 ", 4, -1), - ("-0x23 ",5, -35), - ("-0x0x ", 4,0), - ("-0x0. ", 4,0), - ("0xa ", 3, 10), - ("0xaaaaaaaaaaaaaaaaa ", 19, 196765270119568550570L), - ("0xA ", 3, 10), - ("0xAAAAAAAAAAAAAAAAA ", 19, 196765270119568550570L), - ], - [ # should not match... - ".0", - "a", - "+.0", - "+a", - "-.0", - "-a", - "0x ", - "0xg", - "0x", - ], - ), - ( - "binary_number", numbers.BinaryInterpreter, - [ # should match, value, length that should match, expected result - ("0b0 ", 2, 0), - ("1b0 ", 2, 1), - ("10b0 ", 3, 2), - ("10000000000b0 ", 12, 1024), - ("0B0 ", 2, 0), - ("1B0 ", 2, 1), - ("10B0 ", 3, 2), - ("10000000000B0 ", 12, 1024), - ], - [ # should not match... - ".0", - "a", - "+.0", - "+a", - "-.0", - "-a", - "0x ", - "0xg", - "0x", - ], - ), - ( - "float", numbers.FloatInterpreter, - [ # should match, value, length that should match, expected result - ("0. ", 2, 0), - ("1. ", 2, 1), - ("23. ",3, 23), - (".0 ", 2, 0), - (".1 ", 2, .1), - (".23 ",3, .23), - ("0.0x ", 3,0), - ("1.1x ", 3,1.1), - ("2000000.22222222x ", 16, 2000000.22222222), - ("1.1e20 ", 6, 1.1e20), - ("1.1e-20 ",7, 1.1e-20), - ("-1.1e20 ", 7, -1.1e20), - ], - [ # should not match... - "0x.0", - "23", - "-23", - "-43*2a", - "+23", - "-a", - ], - ), - ( - "float_floatexp", numbers.FloatFloatExpInterpreter, - [ # should match, value, length that should match, expected result - ("0. ", 2, 0), - ("1. ", 2, 1), - ("23. ",3, 23), - (".0 ", 2, 0), - (".1 ", 2, .1), - (".23 ",3, .23), - ("0.0x ", 3,0), - ("1.1x ", 3,1.1), - ("2000000.22222222x ", 16, 2000000.22222222), - ("1.1e20 ", 6, 1.1* (1e20)), - ("1.1e-20 ",7, 1.1* (1e-20)), - ("-1.1e20 ", 7, -1.1* (1e20)), - ("1.1e20.34 ", 9, 1.1* (10 ** 20.34)), - ("1.1e-.34 ", 8, 1.1*( 10 ** -.34)), - ], - [ # should not match... - "0x.0", - "23", - "-23", - "-43*2a", - "+23", - "-a", - ], - ), + ( + "int_unsigned", numbers.IntInterpreter, + [ # should match, value, length that should match, expected result + ("0 ", 1, 0), + ("1 ", 1, 1), + ("23 ",2, 23), + ("0x ", 1,0), + ("0. ", 1,0), + ], + [ # should not match... + ".0", + "a", + ], + ), + ( + "int", numbers.IntInterpreter, + [ # should match, value, length that should match, expected result + ("0 ", 1, 0), + ("1 ", 1, 1), + ("23 ",2, 23), + ("0x ", 1,0), + ("0. ", 1,0), + ("+0 ", 2, 0), + ("+1 ", 2, 1), + ("+23 ",3, 23), + ("+0x ", 2,0), + ("+0. ", 2,0), + ("-0 ", 2, 0), + ("-1 ", 2, -1), + ("-23 ",3, -23), + ("-0x ", 2,0), + ("-0. ", 2,0), + ], + [ # should not match... + ".0", + "a", + "+.0", + "+a", + "-.0", + "-a", + ], + ), + ( + "hex", numbers.HexInterpreter, + [ # should match, value, length that should match, expected result + ("0x0 ", 3, 0), + ("0x1 ", 3, 1), + ("0x23 ",4, 35), + ("0x0x ", 3,0), + ("0x0. ", 3,0), + ("+0x0 ", 4, 0), + ("+0x1 ", 4, 1), + ("+0x23 ",5, 35), + ("+0x0x ", 4,0), + ("+0x0. ", 4,0), + ("-0x0 ", 4, 0), + ("-0x1 ", 4, -1), + ("-0x23 ",5, -35), + ("-0x0x ", 4,0), + ("-0x0. ", 4,0), + ("0xa ", 3, 10), + ("0xaaaaaaaaaaaaaaaaa ", 19, 196765270119568550570), + ("0xA ", 3, 10), + ("0xAAAAAAAAAAAAAAAAA ", 19, 196765270119568550570), + ], + [ # should not match... + ".0", + "a", + "+.0", + "+a", + "-.0", + "-a", + "0x ", + "0xg", + "0x", + ], + ), + ( + "binary_number", numbers.BinaryInterpreter, + [ # should match, value, length that should match, expected result + ("0b0 ", 2, 0), + ("1b0 ", 2, 1), + ("10b0 ", 3, 2), + ("10000000000b0 ", 12, 1024), + ("0B0 ", 2, 0), + ("1B0 ", 2, 1), + ("10B0 ", 3, 2), + ("10000000000B0 ", 12, 1024), + ], + [ # should not match... + ".0", + "a", + "+.0", + "+a", + "-.0", + "-a", + "0x ", + "0xg", + "0x", + ], + ), + ( + "float", numbers.FloatInterpreter, + [ # should match, value, length that should match, expected result + ("0. ", 2, 0), + ("1. ", 2, 1), + ("23. ",3, 23), + (".0 ", 2, 0), + (".1 ", 2, .1), + (".23 ",3, .23), + ("0.0x ", 3,0), + ("1.1x ", 3,1.1), + ("2000000.22222222x ", 16, 2000000.22222222), + ("1.1e20 ", 6, 1.1e20), + ("1.1e-20 ",7, 1.1e-20), + ("-1.1e20 ", 7, -1.1e20), + ], + [ # should not match... + "0x.0", + "23", + "-23", + "-43*2a", + "+23", + "-a", + ], + ), + ( + "float_floatexp", numbers.FloatFloatExpInterpreter, + [ # should match, value, length that should match, expected result + ("0. ", 2, 0), + ("1. ", 2, 1), + ("23. ",3, 23), + (".0 ", 2, 0), + (".1 ", 2, .1), + (".23 ",3, .23), + ("0.0x ", 3,0), + ("1.1x ", 3,1.1), + ("2000000.22222222x ", 16, 2000000.22222222), + ("1.1e20 ", 6, 1.1* (1e20)), + ("1.1e-20 ",7, 1.1* (1e-20)), + ("-1.1e20 ", 7, -1.1* (1e20)), + ("1.1e20.34 ", 9, 1.1* (10 ** 20.34)), + ("1.1e-.34 ", 8, 1.1*( 10 ** -.34)), + ], + [ # should not match... + "0x.0", + "23", + "-23", + "-43*2a", + "+23", + "-a", + ], + ), ] - + class CommonTests(unittest.TestCase): - def testBasic( self ): - for production, processor, yestable, notable in _data: - p = Parser( "x := %s"%production, 'x') - proc = dispatchprocessor.DispatchProcessor() - setattr(proc, production, processor()) - for data, length, value in yestable: - success, results, next = p.parse( data, processor = proc) - assert next == length, """Did not parse string %s of %s as a %s result=%s"""%( repr(data[:length]), repr(data), production, (success, results, next)) - assert results[0] == value, """Didn't get expected value from processing value %s, expected %s, got %s"""%( data[:length], value, results[0]) - - for data in notable: - success, results, next = p.parse( data) - assert not success, """Parsed %s of %s as a %s result=%s"""%( repr(data[:length]), repr(data), production, (success, results, next)) - - - + def testBasic( self ): + for production, processor, yestable, notable in _data: + p = Parser( "x := %s"%production, 'x') + proc = dispatchprocessor.DispatchProcessor() + setattr(proc, production, processor()) + for data, length, value in yestable: + success, results, next = p.parse( data, processor = proc) + assert next == length, """Did not parse string %s of %s as a %s result=%s"""%( repr(data[:length]), repr(data), production, (success, results, next)) + assert results[0] == value, """Didn't get expected value from processing value %s, expected %s, got %s"""%( data[:length], value, results[0]) + + for data in notable: + success, results, next = p.parse( data) + assert not success, """Parsed %s of %s as a %s result=%s"""%( repr(data[:length]), repr(data), production, (success, results, next)) + + + def getSuite(): - return unittest.makeSuite(CommonTests, 'test') + return unittest.makeSuite(CommonTests, 'test') if __name__ == "__main__": - unittest.main(defaultTest="getSuite") + unittest.main(defaultTest="getSuite") diff -Nru simpleparse-2.1.0a1/tests/test_common_strings.py simpleparse-2.2.0/tests/test_common_strings.py --- simpleparse-2.1.0a1/tests/test_common_strings.py 2002-08-07 00:12:07.000000000 +0000 +++ simpleparse-2.2.0/tests/test_common_strings.py 2015-11-11 18:42:23.000000000 +0000 @@ -1,66 +1,66 @@ -import unittest, string +import unittest from simpleparse.parser import Parser from simpleparse.common import strings from simpleparse import dispatchprocessor parseTests = [ - # each production should match the whole of all of the first, - # and not match any of the second... - ("string_triple_single", [ - """'''this and that'''""", - """'''this \\''' '''""", - """''''''""", - """''''\\''''""", - ],[]), - ("string_triple_double", [ - '''"""this and that"""''', - '''"""this \\""" """''', - '''""""""''', - '''""""\\""""''', - ],[]), - ("string_double_quote", [ - '"\\p"', - '"\\""', - ],[]), - ("string",[ - "'this'", - '"that"', - r'"\b\f\n\r"', - r'"\x32\xff\xcf"', - r'"\032\033\055\077"', - r'"\t\v\\\a\b\f\n\r"', - r'"\t"', - r'"\v"', - r'"\""', - ], []), + # each production should match the whole of all of the first, + # and not match any of the second... + ("string_triple_single", [ + """'''this and that'''""", + """'''this \\''' '''""", + """''''''""", + """''''\\''''""", + ],[]), + ("string_triple_double", [ + '''"""this and that"""''', + '''"""this \\""" """''', + '''""""""''', + '''""""\\""""''', + ],[]), + ("string_double_quote", [ + '"\\p"', + '"\\""', + ],[]), + ("string",[ + "'this'", + '"that"', + r'"\b\f\n\r"', + r'"\x32\xff\xcf"', + r'"\032\033\055\077"', + r'"\t\v\\\a\b\f\n\r"', + r'"\t"', + r'"\v"', + r'"\""', + ], []), ] class CommonTests(unittest.TestCase): - def testBasic( self ): - proc = dispatchprocessor.DispatchProcessor() - setattr(proc, "string", strings.StringInterpreter()) - for production, yestable, notable in parseTests: - p = Parser( "x := %s"%production, 'x') - for data in yestable: - if production == 'string': - success, results, next = p.parse( data, processor=proc) - else: - success, results, next = p.parse( data) - assert success and (next == len(data)), """Did not parse string %s as a %s result=%s"""%( repr(data), production, (success, results, next)) - assert results, """Didn't get any results for string %s as a %s result=%s"""%( repr(data), production, (success, results, next)) - if production == 'string': - expected = eval( data, {},{}) - assert results[0] == expected, """Got different interpreted value for data %s, we got %s, expected %s"""%( repr(data), repr(results[0]), repr(expected)) - for data in notable: - success, results, next = p.parse( data) - assert not success, """Parsed %s of %s as a %s result=%s"""%( repr(data), production, (success, results, next)) - - - + def testBasic( self ): + proc = dispatchprocessor.DispatchProcessor() + setattr(proc, "string", strings.StringInterpreter()) + for production, yestable, notable in parseTests: + p = Parser( "x := %s"%production, 'x') + for data in yestable: + if production == 'string': + success, results, next = p.parse( data, processor=proc) + else: + success, results, next = p.parse( data) + assert success and (next == len(data)), """Did not parse string %s as a %s result=%s"""%( repr(data), production, (success, results, next)) + assert results, """Didn't get any results for string %s as a %s result=%s"""%( repr(data), production, (success, results, next)) + if production == 'string': + expected = eval( data, {},{}) + assert results[0] == expected, """Got different interpreted value for data %s, we got %s, expected %s"""%( repr(data), repr(results[0]), repr(expected)) + for data in notable: + success, results, next = p.parse( data) + assert not success, """Parsed %s of %s as a %s result=%s"""%( repr(data), production, (success, results, next)) + + + def getSuite(): - return unittest.makeSuite(CommonTests, 'test') + return unittest.makeSuite(CommonTests, 'test') if __name__ == "__main__": - unittest.main(defaultTest="getSuite") + unittest.main(defaultTest="getSuite") diff -Nru simpleparse-2.1.0a1/tests/test_deep_nesting.py simpleparse-2.2.0/tests/test_deep_nesting.py --- simpleparse-2.1.0a1/tests/test_deep_nesting.py 2006-02-18 23:15:13.000000000 +0000 +++ simpleparse-2.2.0/tests/test_deep_nesting.py 2015-11-11 19:01:43.000000000 +0000 @@ -1,7 +1,8 @@ +from __future__ import print_function + from simpleparse.simpleparsegrammar import Parser from simpleparse.stt.TextTools import TextTools -import pprint -from genericvalues import NullResult, AnyInt +from .genericvalues import NullResult declaration = r'''testparser := as? @@ -10,29 +11,29 @@ ''' testdata = 'aaaa' expectedResult = (1, [ - ('as', 0, 4, [ - ('a', 0, 1, NullResult), - ('as', 1, 4, [ - ('a', 1, 2, NullResult), - ('as', 2, 4, [ - ('a', 2, 3, NullResult), - ('as', 3, 4, [ - ('a', 3, 4, NullResult) - ]) - ]) - ]) - ]) + ('as', 0, 4, [ + ('a', 0, 1, NullResult), + ('as', 1, 4, [ + ('a', 1, 2, NullResult), + ('as', 2, 4, [ + ('a', 2, 3, NullResult), + ('as', 3, 4, [ + ('a', 3, 4, NullResult) + ]) + ]) + ]) + ]) ], 4) parser = Parser( declaration ).generator.buildParser( 'testparser' ) -print "About to attempt the deep-nesting test" -print "If python goes into an infinite loop, then the test failed ;) " -print +print("About to attempt the deep-nesting test") +print("If python goes into an infinite loop, then the test failed ;) ") +print() result = TextTools.tag( testdata, parser ) if result != expectedResult: - print 'test-deep-nesting failed' - print '\texpected', expectedResult - print '\tgot', result + print('test-deep-nesting failed') + print('\texpected', expectedResult) + print('\tgot', result) else: - print "test-deep-nesting succeeded!\nYou're probably using the non-recursive mx.TextTools rewrite" + print("test-deep-nesting succeeded!\nYou're probably using the non-recursive mx.TextTools rewrite") diff -Nru simpleparse-2.1.0a1/tests/test_erroronfail.py simpleparse-2.2.0/tests/test_erroronfail.py --- simpleparse-2.1.0a1/tests/test_erroronfail.py 2003-12-27 00:11:26.000000000 +0000 +++ simpleparse-2.2.0/tests/test_erroronfail.py 2015-11-11 18:42:23.000000000 +0000 @@ -3,113 +3,113 @@ from simpleparse.error import ParserSyntaxError class ErrorOnFailTests( unittest.TestCase ): - """Tests of the error-on failure mechanisms""" - def shouldRaise(self, definition, parserName, testValue, ): - self.failUnlessRaises( ParserSyntaxError, Parser( definition).parse, testValue, parserName ) - def shouldNotRaise(self, definition, parserName, testValue, ): - success,result, next = Parser( definition).parse( testValue, parserName ) - assert success, """Didn't parse %s (error on fail test for definition %s)"""%( repr(testValue), repr(definition)) - + """Tests of the error-on failure mechanisms""" + def shouldRaise(self, definition, parserName, testValue, ): + self.assertRaises( ParserSyntaxError, Parser( definition).parse, testValue, parserName ) + def shouldNotRaise(self, definition, parserName, testValue, ): + success,result, next = Parser( definition).parse( testValue, parserName ) + assert success, """Didn't parse %s (error on fail test for definition %s)"""%( repr(testValue), repr(definition)) + - def testErrorOnFail1( self ): - self.shouldRaise( - '''s := -trailer! - trailer := "bad" - ''', - 's', - 'badba', - ) - def testErrorOnFail2( self ): - self.shouldRaise( - '''s := -"bad"! - ''', - 's', - 'badba', - ) - def testErrorOnFail3( self ): - self.shouldRaise( - '''s := -(a,b)! - a := "a" - b := "b" - ''', - 's', - 'abdba', - ) - def testErrorOnFail4( self ): - self.shouldRaise( - '''s := -[ab]! - ''', - 's', - 'abdba', - ) - - def testErrorOnFail5( self ): - self.shouldRaise( - '''s := !,'a','b' - ''', - 's', - 'badba', - ) - def testErrorOnFail6( self ): - self.shouldNotRaise( - '''s := 'a',!,'b' - ''', - 's', - 'abdba', - ) - def testErrorOnFail7( self ): - self.shouldNotRaise( - '''s := 'a',!,'b'? - ''', - 's', - 'acbdba', - ) - def testErrorOnFail8( self ): - self.shouldRaise( - '''s := 'a',!,'b' - ''', - 's', - 'acbdba', - ) - def testErrorOnFail9( self ): - self.shouldRaise( - '''s := !,'a','b' - ''', - 's', - 'bcbdba', - ) - def testErrorOnFail10( self ): - """Test for use of setting message in definition""" - self.shouldRaise( - '''s := 'a',! "Blargh!",'b' - ''', - 's', - 'acbdba', - ) - def testErrorOnFail11( self ): - """Test proper setting of err message text from !"message" syntax""" - try: - Parser( '''s := 'a',! "Blargh!",'b' - ''', 's' ).parse( - 'acbdba', - ) - except ParserSyntaxError, err: - assert err.message == "Blargh!", """Error message was %r, should have been "Blargh!"."""%(err.message,) - def testErrorOnFail12( self ): - """Test proper setting of err message text from !"message" syntax""" - try: - Parser( '''s := 'a',! "Blargh!",'b' - ''', 's' ).parse( - 'acbdba', - ) - except ParserSyntaxError, err: - description = str( err ) - assert description == 'ParserSyntaxError: Blargh!', """Didn't get expected error description, got: %s"""%( - str(err), - ) + def testErrorOnFail1( self ): + self.shouldRaise( + '''s := -trailer! + trailer := "bad" + ''', + 's', + 'badba', + ) + def testErrorOnFail2( self ): + self.shouldRaise( + '''s := -"bad"! + ''', + 's', + 'badba', + ) + def testErrorOnFail3( self ): + self.shouldRaise( + '''s := -(a,b)! + a := "a" + b := "b" + ''', + 's', + 'abdba', + ) + def testErrorOnFail4( self ): + self.shouldRaise( + '''s := -[ab]! + ''', + 's', + 'abdba', + ) + + def testErrorOnFail5( self ): + self.shouldRaise( + '''s := !,'a','b' + ''', + 's', + 'badba', + ) + def testErrorOnFail6( self ): + self.shouldNotRaise( + '''s := 'a',!,'b' + ''', + 's', + 'abdba', + ) + def testErrorOnFail7( self ): + self.shouldNotRaise( + '''s := 'a',!,'b'? + ''', + 's', + 'acbdba', + ) + def testErrorOnFail8( self ): + self.shouldRaise( + '''s := 'a',!,'b' + ''', + 's', + 'acbdba', + ) + def testErrorOnFail9( self ): + self.shouldRaise( + '''s := !,'a','b' + ''', + 's', + 'bcbdba', + ) + def testErrorOnFail10( self ): + """Test for use of setting message in definition""" + self.shouldRaise( + '''s := 'a',! "Blargh!",'b' + ''', + 's', + 'acbdba', + ) + def testErrorOnFail11( self ): + """Test proper setting of err message text from !"message" syntax""" + try: + Parser( '''s := 'a',! "Blargh!",'b' + ''', 's' ).parse( + 'acbdba', + ) + except ParserSyntaxError as err: + assert err.args[0] == "Blargh!", """Error message was %r, should have been "Blargh!"."""%(err.args[0],) + def testErrorOnFail12( self ): + """Test proper setting of err message text from !"message" syntax""" + try: + Parser( '''s := 'a',! "Blargh!",'b' + ''', 's' ).parse( + 'acbdba', + ) + except ParserSyntaxError as err: + description = str( err ) + assert description == 'ParserSyntaxError: Blargh!', """Didn't get expected error description, got: %s"""%( + str(err), + ) def getSuite(): - return unittest.makeSuite(ErrorOnFailTests,'test') + return unittest.makeSuite(ErrorOnFailTests,'test') if __name__ == "__main__": - unittest.main(defaultTest="getSuite") + unittest.main(defaultTest="getSuite") \ No newline at end of file diff -Nru simpleparse-2.1.0a1/tests/test_grammarparser.py simpleparse-2.2.0/tests/test_grammarparser.py --- simpleparse-2.1.0a1/tests/test_grammarparser.py 2006-02-18 23:15:27.000000000 +0000 +++ simpleparse-2.2.0/tests/test_grammarparser.py 2015-11-11 19:01:43.000000000 +0000 @@ -1,862 +1,866 @@ """Tests that simpleparsegrammar does parse SimpleParse grammars """ - -import unittest, pprint +import unittest,pprint from simpleparse.simpleparsegrammar import SPGenerator, declaration from simpleparse.parser import Parser -from simpleparse.error import ParserSyntaxError from simpleparse.stt.TextTools import TextTools -from genericvalues import NullResult, AnyInt +from .genericvalues import NullResult, AnyInt from simpleparse.stt.TextTools import print_tagtable print_tagtable( - SPGenerator.buildParser( 'range' ) + SPGenerator.buildParser( 'range' ) ) class SimpleParseGrammarTests(unittest.TestCase): - """Test parsing of the the simpleparse grammar elements""" - def doBasicTest(self, parserName, testValue, expected, ): - parser = SPGenerator.buildParser( parserName ) - result = TextTools.tag( testValue, parser ) - assert result == expected, '''\nexpected:%s\n got:%s\n'''%( expected, result ) - def testChar1( self ): - self.doBasicTest( - "CHARNODBLQUOTE", - 'test\\""', - (1, [], 4), - ) - def testChar2( self ): - self.doBasicTest( - "ESCAPEDCHAR", - '\\n"', - (1, [('SPECIALESCAPEDCHAR', 1, 2, NullResult)], 2), - ) - def testChar3( self ): - self.doBasicTest( - "ESCAPEDCHAR", - '\\007"', - (1, [('OCTALESCAPEDCHAR', 1, 4, NullResult)], 4), - ) - def testChar4( self ): - testValue = '\\""' - self.doBasicTest( - "CHARNODBLQUOTE", - testValue, - (0, [], AnyInt), - ) - def testChar5( self ): - self.doBasicTest( - "CHARNODBLQUOTE", - 'ehllo\\""', - (1, [], 5), - ) - def testChar6( self ): - self.doBasicTest( - "CHARNODBLQUOTE", - '007', - (1, [], 3), - ) - def testChar7( self ): - self.doBasicTest( - "ESCAPEDCHAR", - '\\"', - (1, [('SPECIALESCAPEDCHAR', 1, 2, NullResult)], 2), - ) - def testChar8( self ): - self.doBasicTest( - "ESCAPEDCHAR", - '\\"', - (1, [('SPECIALESCAPEDCHAR', 1, 2, NullResult)], 2), - ) - def testChar9( self ): - self.doBasicTest( - "ESCAPEDCHAR", - '\\x10', - (1, [('HEXESCAPEDCHAR', 2, 4, NullResult)], 4), - ) - def testChar85( self ): - self.doBasicTest( - "HEXESCAPEDCHAR", - '10', - (1, [], 2), - ) - def testCharNoBrace1( self ): - self.doBasicTest( - "CHARNOBRACE", - 'a-z', - (1, [('CHAR', 0, 1, NullResult)], 1), - ) - def testCharRange1( self ): - self.doBasicTest( - "CHARRANGE", - 'a-z', - (1, [('CHARNOBRACE', 0, 1, [('CHAR', 0, 1, NullResult)]),('CHARNOBRACE', 2, 3, [('CHAR', 2, 3, NullResult)])], 3), - ) - def testRange1( self ): - self.doBasicTest( - "range", - '[a-zA-Z]', - (1, [ - ('CHARRANGE',1,4,[ - ('CHARNOBRACE', 1, 2, [('CHAR', 1, 2, NullResult)]), - ('CHARNOBRACE', 3, 4, [('CHAR', 3, 4, NullResult)]), - ]), - ('CHARRANGE',4,7,[ - ('CHARNOBRACE', 4, 5, [('CHAR', 4, 5, NullResult)]), - ('CHARNOBRACE', 6, 7, [('CHAR', 6, 7, NullResult)]), - ]), - ], 8) - ) - def testRange2( self ): - self.doBasicTest( - "range", - '[-a-zA-Z]', - (1, [ - ('CHARDASH', 1, 2, NullResult), - ('CHARRANGE',2,5,[ - ('CHARNOBRACE', 2, 3, [('CHAR', 2, 3, NullResult)]), - ('CHARNOBRACE', 4, 5, [('CHAR', 4, 5, NullResult)]), - ]), - ('CHARRANGE',5,8,[ - ('CHARNOBRACE', 5, 6, [('CHAR', 5, 6, NullResult)]), - ('CHARNOBRACE', 7, 8, [('CHAR', 7, 8, NullResult)]), - ]), - ], 9), - ) - def testRange3( self ): - self.doBasicTest( - "range", - '[]a-zA-Z]', - (1, [ - ('CHARBRACE', 1, 2, NullResult), - ('CHARRANGE',2,5,[ - ('CHARNOBRACE', 2, 3, [('CHAR', 2, 3, NullResult)]), - ('CHARNOBRACE', 4, 5, [('CHAR', 4, 5, NullResult)]), - ]), - ('CHARRANGE',5,8,[ - ('CHARNOBRACE', 5, 6, [('CHAR', 5, 6, NullResult)]), - ('CHARNOBRACE', 7, 8, [('CHAR', 7, 8, NullResult)]), - ]), - ], 9), - ) - - def testRange4( self ): - """Test optional repeating children running into eof - - Original SimpleParse had a major failure here, - system hung trying to parse the [] string. Basically, - there was no check for EOF during a repeating-item - parse (save for literals and character sets), so you - wound up with infinite loops. - """ - self.doBasicTest( - "range", - '[]', - (0, [], AnyInt), - ) - def testRange5( self ): - """Test optional repeating children with no termination - - Original SimpleParse had a major failure here, - system hung trying to parse the [] string. Basically, - there was no check for EOF during a repeating-item - parse (save for literals and character sets), so you - wound up with infinite loops. - """ - self.doBasicTest( - "range", - '[] ', - (0, [], AnyInt), - ) - - def testLiteral1( self ): - self.doBasicTest( - "literal", - '"test"', - (1, [('CHARNODBLQUOTE', 1, 5, NullResult)], 6), - ) - def testLiteral2( self ): - self.doBasicTest( - "literal", - '"test\\""', - (1, [ - ('CHARNODBLQUOTE', 1, 5, NullResult), - ('ESCAPEDCHAR', 5, 7, [ - ('SPECIALESCAPEDCHAR', 6, 7, NullResult) - ]) - ], 8) - - ) - def testLiteral3( self ): - self.doBasicTest( - "literal", - '""', - (1, [], 2), - ) - def testLiteral4( self ): - self.doBasicTest( - "literal", - '"\'"', - (1, [('CHARNODBLQUOTE', 1, 2, NullResult),], 3), - ) - def testLiteral5( self ): - self.doBasicTest( - "literal", - '"\\"test"', - (1, [ - ('ESCAPEDCHAR', 1, 3, [ - ('SPECIALESCAPEDCHAR', 2, 3, NullResult) - ]), - ('CHARNODBLQUOTE', 3, 7, NullResult) - ], 8) - ) - def testLiteral6( self ): - self.doBasicTest( - "literal", - '"test\\023""', - (1, [ - ('CHARNODBLQUOTE', 1, 5, NullResult), - ('ESCAPEDCHAR', 5, 9, [ - ('OCTALESCAPEDCHAR', 6, 9, NullResult) - ]) - ], 10) - - ) - def testLiteralDecorator( self ): - self.doBasicTest( - "literalDecorator", - 'c', - (1, [], 1), - ) - def testLiteralDecorator2( self ): - self.doBasicTest( - "literal", - 'c"this"', - (1, [('literalDecorator',0,1,NullResult),('CHARNODBLQUOTE',2,6,NullResult)], 7), - ) - def testLiteralDecorator3( self ): - """Decorator must be right next to literal, no whitespace""" - self.doBasicTest( - "literal", - 'c "this"', - (0, [], AnyInt), - ) - - def testWhitespace1( self ): - self.doBasicTest( - "ts", - ' \t', - (1, [], 3) - ) - def testWhitespace2( self ): - self.doBasicTest( - "ts", - ' \t\n', - (1, [], 4) - ) - def testWhitespace3( self ): - self.doBasicTest( - "ts", - ' \t#testing\r\n', - (1, [('comment', 3, 13, NullResult)], 13) - ) - def testWhitespace4( self ): - self.doBasicTest( - "ts", - 'nospace', - (1, [], 0) - ) - def testWhitespace5( self ): - """Bug in 2.0.0 where Null comments such as: - "#\n" - - didn't parse. - """ - self.doBasicTest( - "ts", - ' #\n ', - (1, [('comment',1,3,NullResult)], 4) - ) - - def testName1( self ): - self.doBasicTest( - "name", - 'abcdefg', - (1, [], 7) - ) - def testName2( self ): - self.doBasicTest( - "name", - '2abcdefg', - (0, [], AnyInt) - ) - def testName3( self ): - self.doBasicTest( - "name", - '_abcdefg_-', - (1, [], 9) - ) - - def testUnreportedName1( self ): - self.doBasicTest( - "unreportedname", - '', - (1, [('name',1,8,NullResult)], 9) - ) - def testUnreportedName2( self ): - self.doBasicTest( - "unreportedname", - '<>', - (0, [], AnyInt) - ) - def testExpandedName1( self ): - self.doBasicTest( - "expandedname", - '>abcdefg<', - (1, [('name',1,8,NullResult)], 9) - ) - def testExpandedName2( self ): - self.doBasicTest( - "expandedname", - '><', - (0, [], AnyInt) - ) - def testComment1( self ): - self.doBasicTest( - "comment", - '>', - (0, [], AnyInt) - ) - def testComment2( self ): - self.doBasicTest( - "comment", - '#testing\n', - (1, [], 9) - ) - def testOccurenceIndicator1( self ): - self.doBasicTest( - "occurence_indicator", - '*', - (1, [], 1) - ) - def testOccurenceIndicator2( self ): - self.doBasicTest( - "occurence_indicator", - '+', - (1, [], 1) - ) - def testOccurenceIndicator3( self ): - self.doBasicTest( - "occurence_indicator", - '?', - (1, [], 1) - ) - def testOccurenceIndicator4( self ): - self.doBasicTest( - "occurence_indicator", - 'hello', - (0, [], AnyInt) - ) - def testOccurenceIndicator5( self ): - self.doBasicTest( - "occurence_indicator", - '', - (0, [], AnyInt) - ) - - def testLookAheadIndicator1( self ): - self.doBasicTest( - "lookahead_indicator", - '?', - (1, [], 1) - ) - def testLookAheadIndicator2( self ): - self.doBasicTest( - "lookahead_indicator", - '', - (0, [], AnyInt) - ) - - def testNegposIndicator1( self ): - self.doBasicTest( - "negpos_indicator", - '-', - (1, [], 1) - ) - def testNegposIndicator2( self ): - self.doBasicTest( - "negpos_indicator", - '+', - (1, [], 1) - ) - def testNegposIndicator2( self ): - self.doBasicTest( - "negpos_indicator", - ')', - (0, [], AnyInt) - ) - def testErrorOnFailFlag1( self ): - self.doBasicTest( - "error_on_fail", - '!', - (1, [], 1) - ) - def testFOGroup1( self ): - self.doBasicTest( - "fo_group", - 'a/b', - (1, [ - ('element_token', 0,1,[ - ("name",0,1,NullResult), - ]), - ('element_token', 2,3,[ - ("name",2,3,NullResult), - ]), - ], 3) - ) - def testSEQToken1( self ): - self.doBasicTest( - "seq_group", - 'a,b', - (1, [ - ('element_token', 0,1,[ - ("name",0,1,NullResult), - ]), - ('element_token', 2,3,[ - ("name",2,3,NullResult), - ]), - ], 3) - ) - def testSEQGroup1( self ): - self.doBasicTest( - "seq_group", - 'a,#c\012b', - (1, [ - ('element_token', 0,1,[ - ("name",0,1,NullResult), - ]), - ('element_token', 5,6,[ - ("name",5,6,NullResult), - ]), - ], 6) - ) - def testSeqGroup2( self ): - self.doBasicTest( - "seq_group", - 'ts, (unreportedname/expandedname/name)', - (1, [ - ('element_token', 0,2,[ - ("name",0,2,NullResult), - ]), - ('element_token', 4,38,[ - ('seq_group',5,37,[ - ('fo_group',5,37,[ - ('element_token', 7,8,[ - ("name",7,8,NullResult), - ]) - ]), - ]), - ]), - ], 38) - ) - def testSeqGroup2( self ): - self.doBasicTest( - "seq_group", - '(a/b/c)', - (1, [ - ('element_token',0,7,[ - ('seq_group',1,6,[ - ('fo_group',1,6,[ - ('element_token', 1,2,[ - ("name",1,2,NullResult), - ]), - ('element_token', 3,4,[ - ("name",3,4,NullResult), - ]), - ('element_token', 5,6,[ - ("name",5,6,NullResult), - ]), - ]), - ]), - ]), - ], 7) - ) - def testGroup1( self ): - self.doBasicTest( - "group", - '()', - (0, [], AnyInt) - ) - def testGroup2( self ): - self.doBasicTest( - "group", - '(hello)', - (1, [ - ('seq_group',1,6,[ - ('element_token', 1,6,[ - ("name",1,6,NullResult), - ]), - ]), - ], 7) - ) - def testGroup3( self ): - '''Test group with sequential added group - Note that this test also serves to test - the function of non-reporting names''' - self.doBasicTest( - "group", - '(hello, there)', - (1, [ - ('seq_group', 1,13,[ - ('element_token', 1,6,[ - ("name",1,6,NullResult), - ]), - ('element_token', 8,13,[ - ("name",8,13,NullResult), - ]), - ]), - ], 14) - ) - def testGroup4( self ): - '''Test group with sequential added group - Note that this test also serves to test - the function of non-reporting names''' - self.doBasicTest( - "group", - '(hello/there)', - (1, [ - ('seq_group',1,12,[ - ('fo_group',1,12,[ - ('element_token', 1,6,[ - ("name",1,6,NullResult), - ]), - ('element_token', 7,12,[ - ("name",7,12,NullResult), - ]), - ]), - ]), - ], 13) - ) - def testGroup5( self ): - '''Test group with sequential added group - Note that this test also serves to test - the function of non-reporting names''' - self.doBasicTest( - "group", - '([the]/"and")', - (1, [ - ('seq_group',1,12,[ - ('fo_group',1,12,[ - ('element_token', 1,6,[ - ("range",1,6,[ - ('CHARNOBRACE', 2,3,[ # this should really be a collapsed level - ('CHAR', 2,3,NullResult), - ]), - ('CHARNOBRACE', 3,4,[ # this should really be a collapsed level - ('CHAR', 3,4,NullResult), - ]), - ('CHARNOBRACE', 4,5,[ # this should really be a collapsed level - ('CHAR', 4,5,NullResult), - ]), - ]), - ]), - ('element_token', 7,12,[ - ("literal",7,12,[ - ('CHARNODBLQUOTE', 8,11,NullResult), - ]), - ]), - ]), - ]), - ], 13) - ) - def testGroup6( self ): - '''Test group with multiple / 'd values''' - self.doBasicTest( - "group", - '(hello/there/a)', - (1, [ - ('seq_group',1,14,[ - ('fo_group',1,14,[ - ('element_token', 1,6,[ - ("name",1,6,NullResult), - ]), - ('element_token', 7,12,[ - ("name",7,12,NullResult), - ]), - ('element_token', 13,14,[ - ("name",13,14,NullResult), - ]), - ]), - ]), - ], 15) - ) - def testElementToken1( self ): - self.doBasicTest( - "element_token", - 'hello', - (1, [ - ("name",0,5,NullResult), - ], 5) - ) - def testElementToken2( self ): - self.doBasicTest( - "element_token", - '-hello', - (1, [ - ("negpos_indicator",0,1,NullResult), - ("name",1,6,NullResult), - ], 6) - ) - def testElementToken3( self ): - self.doBasicTest( - "element_token", - '-hello?', - (1, [ - ("negpos_indicator",0,1,NullResult), - ("name",1,6,NullResult), - ("occurence_indicator",6,7,NullResult), - ], 7) - ) - def testElementToken4( self ): - self.doBasicTest( - "element_token", - '- hello ?', - (1, [ - ("negpos_indicator",0,1,NullResult), - ("name",2,7,NullResult), - ("occurence_indicator",8,9,NullResult), - ], 9) - ) - def testElementToken5( self ): - self.doBasicTest( - "element_token", - '+ hello ?', - (1, [ - ("negpos_indicator",0,1,NullResult), - ("name",2,7,NullResult), - ("occurence_indicator",8,9,NullResult), - ], 9) - ) - def testElementToken6( self ): - """Lookahead indicator with positive""" - self.doBasicTest( - "element_token", - '? + hello ?', - (1, [ - ("lookahead_indicator",0,1,NullResult), - ("negpos_indicator",2,3,NullResult), - ("name",4,9,NullResult), - ("occurence_indicator",10,11,NullResult), - ], 11) - ) - def testElementToken7( self ): - """Lookahead indicator with negative""" - self.doBasicTest( - "element_token", - '? - hello ?', - (1, [ - ("lookahead_indicator",0,1,NullResult), - ("negpos_indicator",2,3,NullResult), - ("name",4,9,NullResult), - ("occurence_indicator",10,11,NullResult), - ], 11) - ) - def testElementToken8( self ): - """Lookahead indicator with no neg or pos""" - self.doBasicTest( - "element_token", - '?hello?', - (1, [ - ("lookahead_indicator",0,1,NullResult), - ("name",1,6,NullResult), - ("occurence_indicator",6,7,NullResult), - ], 7) - ) - def testElementToken8( self ): - """Error on fail indicator""" - self.doBasicTest( - "element_token", - 'hello+!', - (1, [ - ("name",0,5,NullResult), - ("occurence_indicator",5,6,NullResult), - ("error_on_fail",6,7,NullResult), - ], 7) - ) - def testElementToken9( self ): - """Error on fail indicator with message""" - self.doBasicTest( - "element_token", - 'hello+! "Unable to complete parse, yikes!"', - (1, [ - ("name",0,5,NullResult), - ("occurence_indicator",5,6,NullResult), - ("error_on_fail",6,42,[ - ("literal",8,42,[ - ("CHARNODBLQUOTE",9,41,NullResult), - ]), - ]), - ], 42) - ) - def testCutToken2( self ): - self.doBasicTest( - "element_token", - '(!,a)', - (1, [ - ('seq_group', 1,4, [ - ("error_on_fail",1,2,NullResult), - ('element_token',3,4,[ - ("name",3,4,NullResult), - ]), - ]), - ], 5) - ) - def testCutToken3( self ): - self.doBasicTest( - "element_token", - '(a,!"this")', - (1, [ - ('seq_group', 1,10, [ - ('element_token',1,2,[ - ("name",1,2,NullResult), - ]), - ("error_on_fail",3,10,[ - ("literal",4,10,[ - ("CHARNODBLQUOTE",5,9,NullResult), - ]), - ]), - ]), - ], 11) - ) - def testCutToken4( self ): - self.doBasicTest( - "element_token", - '(a,!"this",b)', - (1, [ - ('seq_group', 1,12, [ - ('element_token',1,2,[ - ("name",1,2,NullResult), - ]), - ("error_on_fail",3,10,[ - ("literal",4,10,[ - ("CHARNODBLQUOTE",5,9,NullResult), - ]), - ]), - ('element_token',11,12,[ - ("name",11,12,NullResult), - ]), - ]), - ], 13) - ) - def testDeclaration( self ): - self.doBasicTest( - "declaration", - 'a := "a"', - (1, [ - ("name",0,1,NullResult), - ('seq_group',4,8,[ - ('element_token', 5,8,[ - ("literal",5,8,[ - ('CHARNODBLQUOTE', 6,7,NullResult), - ]), - ]), - ]), - ], 8) - ) - def testDeclaration2( self ): - self.doBasicTest( - "declaration", - 'a := b', - (1, [ - ("name",0,1,NullResult), - ('seq_group',4,6,[ - ('element_token', 5,6,[ - ("name",5,6,NullResult), - ]) - ]), - ], 6) - ) - def testDeclaration3( self ): - self.doBasicTest( - "declaration", - 'a := ', - (0,[],AnyInt) - ) - def testDeclaration4( self ): - self.doBasicTest( - "declaration", - ' := b', - (1, [ - ("unreportedname",0,3,[ - ("name",1,2,NullResult), - ]), - ('seq_group',6,8,[ - ('element_token', 7,8,[ - ("name",7,8,NullResult), - ]), - ]) - ], 8) - ) - def testDeclaration5( self ): - self.doBasicTest( - "declaration", - '>a< := b', - (1, [ - ("expandedname",0,3,[ - ("name",1,2,NullResult), - ]), - ('seq_group',6,8,[ - ('element_token', 7,8,[ - ("name",7,8,NullResult), - ]) - ]), - ], 8) - ) - def testDeclarationSet1( self ): - self.doBasicTest( - "declarationset", - 'a := b #hello\012b:="c"', - (1, [ - ('declaration', 0,15,[ - ("name",0,1,NullResult), - ('seq_group',4,15,[ - ('element_token', 5,15,[ - ("name",5,6,NullResult), - ]) - ]) - ]), - ('declaration', 15,21,[ - ("name",15,16,NullResult), - ('seq_group',18,21,[ - ('element_token', 18,21,[ - ("literal",18,21,[ - ('CHARNODBLQUOTE', 19,20,NullResult), - ]), - ]), - ]), - ]), - ], 21) - ) - def testDeclarationSet2( self ): - '''Just tries to parse and sees that everything was parsed, doesn't predict the result''' - parser = SPGenerator.buildParser( "declarationset" ) - result = TextTools.tag( declaration, parser ) - assert result[-1] == len(declaration), '''Didn't complete parse of the simpleparse declaration, only got %s chars, should have %s'''%(result[-1], len(declaration)) + """Test parsing of the the simpleparse grammar elements""" + def doBasicTest(self, parserName, testValue, expected, ): + parser = SPGenerator.buildParser( parserName ) + result = TextTools.tag( testValue, parser ) + assert result == expected, '''\nexpected:%s\n got:%s\n'''%( pprint.pformat(expected), pprint.pformat(result)) + def testChar1( self ): + self.doBasicTest( + "CHARNODBLQUOTE", + 'test\\""', + (1, [], 4), + ) + def testChar2( self ): + self.doBasicTest( + "ESCAPEDCHAR", + '\\n"', + (1, [('SPECIALESCAPEDCHAR', 1, 2, NullResult)], 2), + ) + def testChar3( self ): + self.doBasicTest( + "ESCAPEDCHAR", + '\\007"', + (1, [('OCTALESCAPEDCHAR', 1, 4, NullResult)], 4), + ) + def testChar4( self ): + testValue = '\\""' + self.doBasicTest( + "CHARNODBLQUOTE", + testValue, + (0, [], AnyInt), + ) + def testChar5( self ): + self.doBasicTest( + "CHARNODBLQUOTE", + 'ehllo\\""', + (1, [], 5), + ) + def testChar6( self ): + self.doBasicTest( + "CHARNODBLQUOTE", + '007', + (1, [], 3), + ) + def testChar7( self ): + self.doBasicTest( + "ESCAPEDCHAR", + '\\"', + (1, [('SPECIALESCAPEDCHAR', 1, 2, NullResult)], 2), + ) + def testChar8( self ): + self.doBasicTest( + "ESCAPEDCHAR", + '\\"', + (1, [('SPECIALESCAPEDCHAR', 1, 2, NullResult)], 2), + ) + def testChar9( self ): + self.doBasicTest( + "ESCAPEDCHAR", + '\\x10', + (1, [('HEXESCAPEDCHAR', 2, 4, NullResult)], 4), + ) + def testChar85( self ): + self.doBasicTest( + "HEXESCAPEDCHAR", + '10', + (1, [], 2), + ) + def testCharNoBrace1( self ): + self.doBasicTest( + "CHARNOBRACE", + 'a-z', + (1, [('CHAR', 0, 1, NullResult)], 1), + ) + def testCharRange1( self ): + self.doBasicTest( + "CHARRANGE", + 'a-z', + (1, [('CHARNOBRACE', 0, 1, [('CHAR', 0, 1, NullResult)]),('CHARNOBRACE', 2, 3, [('CHAR', 2, 3, NullResult)])], 3), + ) + def testRange1( self ): + self.doBasicTest( + "range", + '[a-zA-Z]', + (1, [ + ('CHARRANGE',1,4,[ + ('CHARNOBRACE', 1, 2, [('CHAR', 1, 2, NullResult)]), + ('CHARNOBRACE', 3, 4, [('CHAR', 3, 4, NullResult)]), + ]), + ('CHARRANGE',4,7,[ + ('CHARNOBRACE', 4, 5, [('CHAR', 4, 5, NullResult)]), + ('CHARNOBRACE', 6, 7, [('CHAR', 6, 7, NullResult)]), + ]), + ], 8) + ) + def testRange2( self ): + self.doBasicTest( + "range", + '[-a-zA-Z]', + (1, [ + ('CHARDASH', 1, 2, NullResult), + ('CHARRANGE',2,5,[ + ('CHARNOBRACE', 2, 3, [('CHAR', 2, 3, NullResult)]), + ('CHARNOBRACE', 4, 5, [('CHAR', 4, 5, NullResult)]), + ]), + ('CHARRANGE',5,8,[ + ('CHARNOBRACE', 5, 6, [('CHAR', 5, 6, NullResult)]), + ('CHARNOBRACE', 7, 8, [('CHAR', 7, 8, NullResult)]), + ]), + ], 9), + ) + def testRange3( self ): + self.doBasicTest( + "range", + '[]a-zA-Z]', + (1, [ + ('CHARBRACE', 1, 2, NullResult), + ('CHARRANGE',2,5,[ + ('CHARNOBRACE', 2, 3, [('CHAR', 2, 3, NullResult)]), + ('CHARNOBRACE', 4, 5, [('CHAR', 4, 5, NullResult)]), + ]), + ('CHARRANGE',5,8,[ + ('CHARNOBRACE', 5, 6, [('CHAR', 5, 6, NullResult)]), + ('CHARNOBRACE', 7, 8, [('CHAR', 7, 8, NullResult)]), + ]), + ], 9), + ) + + def testRange4( self ): + """Test optional repeating children running into eof + + Original SimpleParse had a major failure here, + system hung trying to parse the [] string. Basically, + there was no check for EOF during a repeating-item + parse (save for literals and character sets), so you + wound up with infinite loops. + """ + self.doBasicTest( + "range", + '[]', + (0, [], AnyInt), + ) + def testRange5( self ): + """Test optional repeating children with no termination + + Original SimpleParse had a major failure here, + system hung trying to parse the [] string. Basically, + there was no check for EOF during a repeating-item + parse (save for literals and character sets), so you + wound up with infinite loops. + """ + self.doBasicTest( + "range", + '[] ', + (0, [], AnyInt), + ) + + def testLiteral1( self ): + self.doBasicTest( + "literal", + '"test"', + (1, [('CHARNODBLQUOTE', 1, 5, NullResult)], 6), + ) + def testLiteral2( self ): + self.doBasicTest( + "literal", + '"test\\""', + (1, [ + ('CHARNODBLQUOTE', 1, 5, NullResult), + ('ESCAPEDCHAR', 5, 7, [ + ('SPECIALESCAPEDCHAR', 6, 7, NullResult) + ]) + ], 8) + + ) + def testLiteral3( self ): + self.doBasicTest( + "literal", + '""', + (1, [], 2), + ) + def testLiteral4( self ): + self.doBasicTest( + "literal", + '"\'"', + (1, [('CHARNODBLQUOTE', 1, 2, NullResult),], 3), + ) + def testLiteral5( self ): + self.doBasicTest( + "literal", + '"\\"test"', + (1, [ + ('ESCAPEDCHAR', 1, 3, [ + ('SPECIALESCAPEDCHAR', 2, 3, NullResult) + ]), + ('CHARNODBLQUOTE', 3, 7, NullResult) + ], 8) + ) + def testLiteral6( self ): + self.doBasicTest( + "literal", + '"test\\023""', + (1, [ + ('CHARNODBLQUOTE', 1, 5, NullResult), + ('ESCAPEDCHAR', 5, 9, [ + ('OCTALESCAPEDCHAR', 6, 9, NullResult) + ]) + ], 10) + + ) + def testLiteralDecorator( self ): + self.doBasicTest( + "literalDecorator", + 'c', + (1, [], 1), + ) + def testLiteralDecorator2( self ): + self.doBasicTest( + "literal", + 'c"this"', + (1, [('literalDecorator',0,1,NullResult),('CHARNODBLQUOTE',2,6,NullResult)], 7), + ) + def testLiteralDecorator3( self ): + """Decorator must be right next to literal, no whitespace""" + self.doBasicTest( + "literal", + 'c "this"', + (0, [], AnyInt), + ) + + def testWhitespace1( self ): + self.doBasicTest( + "ts", + ' \t', + (1, [], 3) + ) + def testWhitespace2( self ): + self.doBasicTest( + "ts", + ' \t\n', + (1, [], 4) + ) + def testWhitespace3( self ): + self.doBasicTest( + "ts", + ' \t#testing\r\n', + (1, [('comment', 3, 13, NullResult)], 13) + ) + def testWhitespace4( self ): + self.doBasicTest( + "ts", + 'nospace', + (1, [], 0) + ) + def testWhitespace5( self ): + """Bug in 2.0.0 where Null comments such as: + "#\n" + + didn't parse. + """ + self.doBasicTest( + "ts", + ' #\n ', + (1, [('comment',1,3,NullResult)], 4) + ) + + def testName1( self ): + self.doBasicTest( + "name", + 'abcdefg', + (1, [], 7) + ) + def testName2( self ): + self.doBasicTest( + "name", + '2abcdefg', + (0, [], AnyInt) + ) + def testName3( self ): + self.doBasicTest( + "name", + '_abcdefg_-', + (1, [], 9) + ) + + def testUnreportedName1( self ): + self.doBasicTest( + "unreportedname", + '', + (1, [('name',1,8,NullResult)], 9) + ) + def testUnreportedName2( self ): + self.doBasicTest( + "unreportedname", + '<>', + (0, [], AnyInt) + ) + def testExpandedName1( self ): + self.doBasicTest( + "expandedname", + '>abcdefg<', + (1, [('name',1,8,NullResult)], 9) + ) + def testExpandedName2( self ): + self.doBasicTest( + "expandedname", + '><', + (0, [], AnyInt) + ) + def testComment1( self ): + self.doBasicTest( + "comment", + '>', + (0, [], AnyInt) + ) + def testComment2( self ): + self.doBasicTest( + "comment", + '#testing\n', + (1, [], 9) + ) + def testOccurenceIndicator1( self ): + self.doBasicTest( + "occurence_indicator", + '*', + (1, [], 1) + ) + def testOccurenceIndicator2( self ): + self.doBasicTest( + "occurence_indicator", + '+', + (1, [], 1) + ) + def testOccurenceIndicator3( self ): + self.doBasicTest( + "occurence_indicator", + '?', + (1, [], 1) + ) + def testOccurenceIndicator4( self ): + self.doBasicTest( + "occurence_indicator", + 'hello', + (0, [], AnyInt) + ) + def testOccurenceIndicator5( self ): + self.doBasicTest( + "occurence_indicator", + '', + (0, [], AnyInt) + ) + + def testLookAheadIndicator1( self ): + self.doBasicTest( + "lookahead_indicator", + '?', + (1, [], 1) + ) + def testLookAheadIndicator2( self ): + self.doBasicTest( + "lookahead_indicator", + '', + (0, [], AnyInt) + ) + + def testNegposIndicator1( self ): + self.doBasicTest( + "negpos_indicator", + '-', + (1, [], 1) + ) + def testNegposIndicator2( self ): + self.doBasicTest( + "negpos_indicator", + '+', + (1, [], 1) + ) + def testNegposIndicator3( self ): + self.doBasicTest( + "negpos_indicator", + ')', + (0, [], AnyInt) + ) + def testErrorOnFailFlag1( self ): + self.doBasicTest( + "error_on_fail", + '!', + (1, [], 1) + ) + def testFOGroup1( self ): + self.doBasicTest( + "fo_group", + 'a/b', + (1, [ + ('element_token', 0,1,[ + ("name",0,1,NullResult), + ]), + ('element_token', 2,3,[ + ("name",2,3,NullResult), + ]), + ], 3) + ) + def testSEQToken1( self ): + self.doBasicTest( + "seq_group", + 'a,b', + (1, [ + ('element_token', 0,1,[ + ("name",0,1,NullResult), + ]), + ('element_token', 2,3,[ + ("name",2,3,NullResult), + ]), + ], 3) + ) + def testSEQGroup1( self ): + self.doBasicTest( + "seq_group", + 'a,#c\012b', + (1, [ + ('element_token', 0,1,[ + ("name",0,1,NullResult), + ]), + ('element_token', 5,6,[ + ("name",5,6,NullResult), + ]), + ], 6) + ) + def testSeqGroup2( self ): + self.doBasicTest( + "seq_group", + 'ts, (unreportedname/expandedname/name)', + (1, [ + ('element_token', 0,2,[ + ("name",0,2,NullResult), + ]), + ('element_token', 4,38,[ + ('seq_group',5,37,[ + ('fo_group',5,37,[ + ('element_token', 5,19,[ + ("name",5,19,NullResult), + ]), + ('element_token', 20,32,[ + ("name",20,32,NullResult), + ]), + ('element_token', 33,37,[ + ("name",33,37,NullResult), + ]), + ]), + ]), + ]), + ], 38) + ) + def testSeqGroup3( self ): + self.doBasicTest( + "seq_group", + '(a/b/c)', + (1, [ + ('element_token',0,7,[ + ('seq_group',1,6,[ + ('fo_group',1,6,[ + ('element_token', 1,2,[ + ("name",1,2,NullResult), + ]), + ('element_token', 3,4,[ + ("name",3,4,NullResult), + ]), + ('element_token', 5,6,[ + ("name",5,6,NullResult), + ]), + ]), + ]), + ]), + ], 7) + ) + def testGroup1( self ): + self.doBasicTest( + "group", + '()', + (0, [], AnyInt) + ) + def testGroup2( self ): + self.doBasicTest( + "group", + '(hello)', + (1, [ + ('seq_group',1,6,[ + ('element_token', 1,6,[ + ("name",1,6,NullResult), + ]), + ]), + ], 7) + ) + def testGroup3( self ): + '''Test group with sequential added group + Note that this test also serves to test + the function of non-reporting names''' + self.doBasicTest( + "group", + '(hello, there)', + (1, [ + ('seq_group', 1,13,[ + ('element_token', 1,6,[ + ("name",1,6,NullResult), + ]), + ('element_token', 8,13,[ + ("name",8,13,NullResult), + ]), + ]), + ], 14) + ) + def testGroup4( self ): + '''Test group with sequential added group + Note that this test also serves to test + the function of non-reporting names''' + self.doBasicTest( + "group", + '(hello/there)', + (1, [ + ('seq_group',1,12,[ + ('fo_group',1,12,[ + ('element_token', 1,6,[ + ("name",1,6,NullResult), + ]), + ('element_token', 7,12,[ + ("name",7,12,NullResult), + ]), + ]), + ]), + ], 13) + ) + def testGroup5( self ): + '''Test group with sequential added group + Note that this test also serves to test + the function of non-reporting names''' + self.doBasicTest( + "group", + '([the]/"and")', + (1, [ + ('seq_group',1,12,[ + ('fo_group',1,12,[ + ('element_token', 1,6,[ + ("range",1,6,[ + ('CHARNOBRACE', 2,3,[ # this should really be a collapsed level + ('CHAR', 2,3,NullResult), + ]), + ('CHARNOBRACE', 3,4,[ # this should really be a collapsed level + ('CHAR', 3,4,NullResult), + ]), + ('CHARNOBRACE', 4,5,[ # this should really be a collapsed level + ('CHAR', 4,5,NullResult), + ]), + ]), + ]), + ('element_token', 7,12,[ + ("literal",7,12,[ + ('CHARNODBLQUOTE', 8,11,NullResult), + ]), + ]), + ]), + ]), + ], 13) + ) + def testGroup6( self ): + '''Test group with multiple / 'd values''' + self.doBasicTest( + "group", + '(hello/there/a)', + (1, [ + ('seq_group',1,14,[ + ('fo_group',1,14,[ + ('element_token', 1,6,[ + ("name",1,6,NullResult), + ]), + ('element_token', 7,12,[ + ("name",7,12,NullResult), + ]), + ('element_token', 13,14,[ + ("name",13,14,NullResult), + ]), + ]), + ]), + ], 15) + ) + def testElementToken1( self ): + self.doBasicTest( + "element_token", + 'hello', + (1, [ + ("name",0,5,NullResult), + ], 5) + ) + def testElementToken2( self ): + self.doBasicTest( + "element_token", + '-hello', + (1, [ + ("negpos_indicator",0,1,NullResult), + ("name",1,6,NullResult), + ], 6) + ) + def testElementToken3( self ): + self.doBasicTest( + "element_token", + '-hello?', + (1, [ + ("negpos_indicator",0,1,NullResult), + ("name",1,6,NullResult), + ("occurence_indicator",6,7,NullResult), + ], 7) + ) + def testElementToken4( self ): + self.doBasicTest( + "element_token", + '- hello ?', + (1, [ + ("negpos_indicator",0,1,NullResult), + ("name",2,7,NullResult), + ("occurence_indicator",8,9,NullResult), + ], 9) + ) + def testElementToken5( self ): + self.doBasicTest( + "element_token", + '+ hello ?', + (1, [ + ("negpos_indicator",0,1,NullResult), + ("name",2,7,NullResult), + ("occurence_indicator",8,9,NullResult), + ], 9) + ) + def testElementToken6( self ): + """Lookahead indicator with positive""" + self.doBasicTest( + "element_token", + '? + hello ?', + (1, [ + ("lookahead_indicator",0,1,NullResult), + ("negpos_indicator",2,3,NullResult), + ("name",4,9,NullResult), + ("occurence_indicator",10,11,NullResult), + ], 11) + ) + def testElementToken7( self ): + """Lookahead indicator with negative""" + self.doBasicTest( + "element_token", + '? - hello ?', + (1, [ + ("lookahead_indicator",0,1,NullResult), + ("negpos_indicator",2,3,NullResult), + ("name",4,9,NullResult), + ("occurence_indicator",10,11,NullResult), + ], 11) + ) + def testElementToken8( self ): + """Lookahead indicator with no neg or pos""" + self.doBasicTest( + "element_token", + '?hello?', + (1, [ + ("lookahead_indicator",0,1,NullResult), + ("name",1,6,NullResult), + ("occurence_indicator",6,7,NullResult), + ], 7) + ) + def testElementToken9( self ): + """Error on fail indicator""" + self.doBasicTest( + "element_token", + 'hello+!', + (1, [ + ("name",0,5,NullResult), + ("occurence_indicator",5,6,NullResult), + ("error_on_fail",6,7,NullResult), + ], 7) + ) + def testElementToken10( self ): + """Error on fail indicator with message""" + self.doBasicTest( + "element_token", + 'hello+! "Unable to complete parse, yikes!"', + (1, [ + ("name",0,5,NullResult), + ("occurence_indicator",5,6,NullResult), + ("error_on_fail",6,42,[ + ("literal",8,42,[ + ("CHARNODBLQUOTE",9,41,NullResult), + ]), + ]), + ], 42) + ) + def testCutToken2( self ): + self.doBasicTest( + "element_token", + '(!,a)', + (1, [ + ('seq_group', 1,4, [ + ("error_on_fail",1,2,NullResult), + ('element_token',3,4,[ + ("name",3,4,NullResult), + ]), + ]), + ], 5) + ) + def testCutToken3( self ): + self.doBasicTest( + "element_token", + '(a,!"this")', + (1, [ + ('seq_group', 1,10, [ + ('element_token',1,2,[ + ("name",1,2,NullResult), + ]), + ("error_on_fail",3,10,[ + ("literal",4,10,[ + ("CHARNODBLQUOTE",5,9,NullResult), + ]), + ]), + ]), + ], 11) + ) + def testCutToken4( self ): + self.doBasicTest( + "element_token", + '(a,!"this",b)', + (1, [ + ('seq_group', 1,12, [ + ('element_token',1,2,[ + ("name",1,2,NullResult), + ]), + ("error_on_fail",3,10,[ + ("literal",4,10,[ + ("CHARNODBLQUOTE",5,9,NullResult), + ]), + ]), + ('element_token',11,12,[ + ("name",11,12,NullResult), + ]), + ]), + ], 13) + ) + def testDeclaration( self ): + self.doBasicTest( + "declaration", + 'a := "a"', + (1, [ + ("name",0,1,NullResult), + ('seq_group',4,8,[ + ('element_token', 5,8,[ + ("literal",5,8,[ + ('CHARNODBLQUOTE', 6,7,NullResult), + ]), + ]), + ]), + ], 8) + ) + def testDeclaration2( self ): + self.doBasicTest( + "declaration", + 'a := b', + (1, [ + ("name",0,1,NullResult), + ('seq_group',4,6,[ + ('element_token', 5,6,[ + ("name",5,6,NullResult), + ]) + ]), + ], 6) + ) + def testDeclaration3( self ): + self.doBasicTest( + "declaration", + 'a := ', + (0,[],AnyInt) + ) + def testDeclaration4( self ): + self.doBasicTest( + "declaration", + ' := b', + (1, [ + ("unreportedname",0,3,[ + ("name",1,2,NullResult), + ]), + ('seq_group',6,8,[ + ('element_token', 7,8,[ + ("name",7,8,NullResult), + ]), + ]) + ], 8) + ) + def testDeclaration5( self ): + self.doBasicTest( + "declaration", + '>a< := b', + (1, [ + ("expandedname",0,3,[ + ("name",1,2,NullResult), + ]), + ('seq_group',6,8,[ + ('element_token', 7,8,[ + ("name",7,8,NullResult), + ]) + ]), + ], 8) + ) + def testDeclarationSet1( self ): + self.doBasicTest( + "declarationset", + 'a := b #hello\012b:="c"', + (1, [ + ('declaration', 0,15,[ + ("name",0,1,NullResult), + ('seq_group',4,15,[ + ('element_token', 5,15,[ + ("name",5,6,NullResult), + ]) + ]) + ]), + ('declaration', 15,21,[ + ("name",15,16,NullResult), + ('seq_group',18,21,[ + ('element_token', 18,21,[ + ("literal",18,21,[ + ('CHARNODBLQUOTE', 19,20,NullResult), + ]), + ]), + ]), + ]), + ], 21) + ) + def testDeclarationSet2( self ): + '''Just tries to parse and sees that everything was parsed, doesn't predict the result''' + parser = SPGenerator.buildParser( "declarationset" ) + result = TextTools.tag( declaration, parser ) + assert result[-1] == len(declaration), '''Didn't complete parse of the simpleparse declaration, only got %s chars, should have %s'''%(result[-1], len(declaration)) recursiveParser = Parser(declaration) class SimpleParseRecursiveTests(SimpleParseGrammarTests): - """Test parsing of grammar elements with generated version of simpleparse grammar""" - def doBasicTest(self, parserName, testValue, expected, ): - result = recursiveParser.parse( testValue, production=parserName ) - assert result == expected, '''\nexpected:%s\n got:%s\n'''%( expected, result ) + """Test parsing of grammar elements with generated version of simpleparse grammar""" + def doBasicTest(self, parserName, testValue, expected, ): + result = recursiveParser.parse( testValue, production=parserName ) + assert result == expected, '''\nexpected:%s\n got:%s\n'''%( expected, result ) def getSuite(): - return unittest.TestSuite(( - unittest.makeSuite(SimpleParseGrammarTests,'test'), - unittest.makeSuite(SimpleParseRecursiveTests,'test'), - )) + return unittest.TestSuite(( + unittest.makeSuite(SimpleParseGrammarTests,'test'), + unittest.makeSuite(SimpleParseRecursiveTests,'test'), + )) if __name__ == "__main__": - unittest.main(defaultTest="getSuite") + unittest.main(defaultTest="getSuite") diff -Nru simpleparse-2.1.0a1/tests/test_objectgenerator.py simpleparse-2.2.0/tests/test_objectgenerator.py --- simpleparse-2.1.0a1/tests/test_objectgenerator.py 2006-02-18 23:15:40.000000000 +0000 +++ simpleparse-2.2.0/tests/test_objectgenerator.py 2015-11-11 19:01:43.000000000 +0000 @@ -1,312 +1,323 @@ -import unittest, pprint, traceback +import unittest from simpleparse.objectgenerator import * -from simpleparse.stt.TextTools import TextTools -from genericvalues import NullResult, AnyInt +from .genericvalues import AnyInt + +try: + _unichr = unichr +except NameError: + _unichr = chr class ElementTokenTests(unittest.TestCase): - def doBasicTest(self, instance, testvalue, expected, startPosition=0 ): - table = tuple(instance.toParser()) - result = tag( testvalue, table , startPosition) - assert result == expected, '''\n\texpected:%s\n\tgot:%s\n'''%( expected, result ) - def testString1( self ): - self.doBasicTest( - Literal( value = 'test' ), - 'test', - (1, [],4), - ) - def testString2( self ): - self.doBasicTest( - Literal( value = 'test', optional =1 ), - 'test', - (1, [],4), - ) - def testString3( self ): - self.doBasicTest( - Literal( value = 'test', optional =1, negative=1 ), - 'test', - (1, [],0), - ) - def testString4( self ): - self.doBasicTest( - Literal( value = 'test', negative=1 ), - 'test', - (0, [],AnyInt), - ) - def testString5( self ): - self.doBasicTest( - Literal( value = 'test', repeating=1), - 'testtest', - (1, [],8), - ) - def testString6( self ): - self.doBasicTest( - Literal( value = 'test', repeating=1, optional = 1), - 'testtest', - (1, [],8), - ) - def testString7( self ): - self.doBasicTest( - Literal( value = 'test', repeating=1, optional = 1, negative = 1), - 'testtest', - (1, [],0), - ) - def testString8( self ): - """Test repeating negative string""" - self.doBasicTest( - Literal( value = 'test', repeating=1, negative = 1), - 'testtest', - (0, [],AnyInt), - ) - def testString9( self ): - self.doBasicTest( - Literal( value = '\\',), - '\\', - (1, [],1), - ) - def testRange1( self ): - self.doBasicTest( - Range( value = 'abc'), - 'aabbcc', - (1, [],1), - ) - def testRange2( self ): - self.doBasicTest( - Range( value = 'abc', optional=1), - 'aabbcc', - (1, [],1), - ) - def testRange3( self ): - self.doBasicTest( - Range( value = 'abc', optional=1, repeating=1), - 'aabbcc', - (1, [],6), - ) - def testRange4( self ): - self.doBasicTest( - Range( value = 'abc', optional=1, repeating=1, negative=1), - 'aabbcc', - (1, [],0), - ) - def testRange5( self ): - self.doBasicTest( - Range( value = 'abc', optional=1, negative=1), - 'aabbcc', - (1, [],0), - ) - def testRange6( self ): - self.doBasicTest( - Range( value = 'abc', negative=1), - 'aabbcc', - (0, [],AnyInt), - ) - def testRange7( self ): - self.doBasicTest( - Range( value = 'abc', negative=1, repeating=1), - 'aabbcc', - (0, [],AnyInt), - ) - def testRange8( self ): - self.doBasicTest( - Range( value = 'abc', negative=1, repeating=1), - 'defc', - (1, [],3), - ) - def testRange9( self ): - self.doBasicTest( - Range( value = 'abc', negative=1), - 'defc', - (1, [],1), - ) - def testSequential1( self ): - self.doBasicTest( - SequentialGroup( - children = [ - Range( value = 'abc',), - Literal( value = 'test', ), - ], - negative=0, - ), - 'atest', - (1, [],5), - ) - def testSequential2( self ): - self.doBasicTest( - SequentialGroup( - children = [ - Range( value = 'abc',), - Literal( value = 'test', ), - ], - negative=1, - ), - 'atest', - (0, [],AnyInt), - ) - def testSequential3( self ): - self.doBasicTest( - SequentialGroup( - children = [ - Range( value = 'abc',), - Literal( value = 'test', ), - ], - negative=1, optional=1, - ), - 'atest', - (1, [],0), - ) - def testSequential4( self ): - self.doBasicTest( - SequentialGroup( - children = [ - Range( value = 'abc',), - Literal( value = 'test', ), - ], - negative=1, optional=1, repeating=1, - ), - 'sdatest', - (1, [],2), - ) - def testSequential5( self ): - self.doBasicTest( - SequentialGroup( - children = [ - Range( value = 'abc',), - Literal( value = 'test', ), - ], - optional=1, repeating=1, - ), - 'atestbtestctest', - (1, [],15), - ) - def testSequential6( self ): - self.doBasicTest( - SequentialGroup( - children = [ - Range( value = 'abc',), - Literal( value = 'test', ), - ], - optional=1, - ), - 'atestbtestctest', - (1, [],5), - ) - - def testSequential7( self ): - self.doBasicTest( - SequentialGroup( - children = [ - Range( value = 'abc',), - Literal( value = 'test', ), - ], - optional=1, - ), - 'satestbtestctest', - (1, [],0), - ) + def doBasicTest(self, instance, testvalue, expected, startPosition=0 ): + table = tuple(instance.toParser()) + result = tag( testvalue, table , startPosition) + assert result == expected, '''\n\texpected:%s\n\tgot:%s\n'''%( expected, result ) + def testString1( self ): + self.doBasicTest( + Literal( value = 'test' ), + 'test', + (1, [],4), + ) + def testString2( self ): + self.doBasicTest( + Literal( value = 'test', optional =1 ), + 'test', + (1, [],4), + ) + def testString3( self ): + self.doBasicTest( + Literal( value = 'test', optional =1, negative=1 ), + 'test', + (1, [],0), + ) + def testString4( self ): + self.doBasicTest( + Literal( value = 'test', negative=1 ), + 'test', + (0, [],AnyInt), + ) + def testString5( self ): + self.doBasicTest( + Literal( value = 'test', repeating=1), + 'testtest', + (1, [],8), + ) + def testString6( self ): + self.doBasicTest( + Literal( value = 'test', repeating=1, optional = 1), + 'testtest', + (1, [],8), + ) + def testString7( self ): + self.doBasicTest( + Literal( value = 'test', repeating=1, optional = 1, negative = 1), + 'testtest', + (1, [],0), + ) + def testString8( self ): + """Test repeating negative string""" + self.doBasicTest( + Literal( value = 'test', repeating=1, negative = 1), + 'testtest', + (0, [],AnyInt), + ) + def testString9( self ): + self.doBasicTest( + Literal( value = '\\',), + '\\', + (1, [],1), + ) + def testRange1( self ): + self.doBasicTest( + Range( value = 'abc'), + 'aabbcc', + (1, [],1), + ) + def testRange2( self ): + self.doBasicTest( + Range( value = 'abc', optional=1), + 'aabbcc', + (1, [],1), + ) + def testRange3( self ): + self.doBasicTest( + Range( value = 'abc', optional=1, repeating=1), + 'aabbcc', + (1, [],6), + ) + def testRange4( self ): + self.doBasicTest( + Range( value = 'abc', optional=1, repeating=1, negative=1), + 'aabbcc', + (1, [],0), + ) + def testRange5( self ): + self.doBasicTest( + Range( value = 'abc', optional=1, negative=1), + 'aabbcc', + (1, [],0), + ) + def testRange6( self ): + self.doBasicTest( + Range( value = 'abc', negative=1), + 'aabbcc', + (0, [],AnyInt), + ) + def testRange7( self ): + self.doBasicTest( + Range( value = 'abc', negative=1, repeating=1), + 'aabbcc', + (0, [],AnyInt), + ) + def testRange8( self ): + self.doBasicTest( + Range( value = 'abc', negative=1, repeating=1), + 'defc', + (1, [],3), + ) + def testRange9( self ): + self.doBasicTest( + Range( value = 'abc', negative=1), + 'defc', + (1, [],1), + ) + def testUnicodeRange10( self ): + urange = Range( value = u''.join([_unichr(x) for x in range( 0x600, 0x6FF+1 )]), repeating=True ) + self.doBasicTest( + urange, + u'\u0600\u06FF', + (1,[],2), + ) + def testSequential1( self ): + self.doBasicTest( + SequentialGroup( + children = [ + Range( value = 'abc',), + Literal( value = 'test', ), + ], + negative=0, + ), + 'atest', + (1, [],5), + ) + def testSequential2( self ): + self.doBasicTest( + SequentialGroup( + children = [ + Range( value = 'abc',), + Literal( value = 'test', ), + ], + negative=1, + ), + 'atest', + (0, [],AnyInt), + ) + def testSequential3( self ): + self.doBasicTest( + SequentialGroup( + children = [ + Range( value = 'abc',), + Literal( value = 'test', ), + ], + negative=1, optional=1, + ), + 'atest', + (1, [],0), + ) + def testSequential4( self ): + self.doBasicTest( + SequentialGroup( + children = [ + Range( value = 'abc',), + Literal( value = 'test', ), + ], + negative=1, optional=1, repeating=1, + ), + 'sdatest', + (1, [],2), + ) + def testSequential5( self ): + self.doBasicTest( + SequentialGroup( + children = [ + Range( value = 'abc',), + Literal( value = 'test', ), + ], + optional=1, repeating=1, + ), + 'atestbtestctest', + (1, [],15), + ) + def testSequential6( self ): + self.doBasicTest( + SequentialGroup( + children = [ + Range( value = 'abc',), + Literal( value = 'test', ), + ], + optional=1, + ), + 'atestbtestctest', + (1, [],5), + ) + + def testSequential7( self ): + self.doBasicTest( + SequentialGroup( + children = [ + Range( value = 'abc',), + Literal( value = 'test', ), + ], + optional=1, + ), + 'satestbtestctest', + (1, [],0), + ) - def testFirstOf1( self ): - self.doBasicTest( - FirstOfGroup( - children = [ - Range( value = 'abc',), - Literal( value = 'test', ), - ], - negative=0, - ), - 'atest', - (1, [],1), - ) - def testFirstOf2( self ): - self.doBasicTest( - FirstOfGroup( - children = [ - Range( value = 'abc',), - Literal( value = 'test', ), - ], - negative=0, - ), - 'testa', - (1, [],4), - ) - def testFirstOf3( self ): - self.doBasicTest( - FirstOfGroup( - children = [ - Range( value = 'abc',), - Literal( value = 'test', ), - ], - negative=1, - ), - 'testa', - (0, [],AnyInt), - ) - def testFirstOf4( self ): - self.doBasicTest( - FirstOfGroup( - children = [ - Range( value = 'abc',), - Literal( value = 'test', ), - ], - negative=1, optional=1, - ), - 'testa', - (1, [],0), - ) - def testFirstOf5( self ): - self.doBasicTest( - FirstOfGroup( - children = [ - Range( value = 'abc',), - Literal( value = 'test', ), - ], - repeating=1, - ), - 'testabtest', - (1, [],10), - ) - def testFirstOf6( self ): - self.doBasicTest( - FirstOfGroup( - children = [ - Range( value = 'abc',), - Literal( value = 'test', ), - ], - repeating=1, negative = 1, - ), - 'hellotheretestabtest', - (1, [],10), - ) + def testFirstOf1( self ): + self.doBasicTest( + FirstOfGroup( + children = [ + Range( value = 'abc',), + Literal( value = 'test', ), + ], + negative=0, + ), + 'atest', + (1, [],1), + ) + def testFirstOf2( self ): + self.doBasicTest( + FirstOfGroup( + children = [ + Range( value = 'abc',), + Literal( value = 'test', ), + ], + negative=0, + ), + 'testa', + (1, [],4), + ) + def testFirstOf3( self ): + self.doBasicTest( + FirstOfGroup( + children = [ + Range( value = 'abc',), + Literal( value = 'test', ), + ], + negative=1, + ), + 'testa', + (0, [],AnyInt), + ) + def testFirstOf4( self ): + self.doBasicTest( + FirstOfGroup( + children = [ + Range( value = 'abc',), + Literal( value = 'test', ), + ], + negative=1, optional=1, + ), + 'testa', + (1, [],0), + ) + def testFirstOf5( self ): + self.doBasicTest( + FirstOfGroup( + children = [ + Range( value = 'abc',), + Literal( value = 'test', ), + ], + repeating=1, + ), + 'testabtest', + (1, [],10), + ) + def testFirstOf6( self ): + self.doBasicTest( + FirstOfGroup( + children = [ + Range( value = 'abc',), + Literal( value = 'test', ), + ], + repeating=1, negative = 1, + ), + 'hellotheretestabtest', + (1, [],10), + ) - def testCIString1( self ): - self.doBasicTest( - CILiteral( value = 'test'), - 'test', - (1, [],4), - ) - def testCIString2( self ): - self.doBasicTest( - CILiteral( value = 'test'), - 'Test', - (1, [],4), - ) - def testCIString3( self ): - self.doBasicTest( - CILiteral( value = 'test'), - 'TEST', - (1, [],4), - ) - def testCIString4( self ): - self.doBasicTest( - CILiteral( value = 'test'), - 'tes', - (0, [],AnyInt), - ) - def testCIString5( self ): - self.doBasicTest( - CILiteral( value = 'test', optional=1), - 'tes', - (1, [], 0), - ) + def testCIString1( self ): + self.doBasicTest( + CILiteral( value = 'test'), + 'test', + (1, [],4), + ) + def testCIString2( self ): + self.doBasicTest( + CILiteral( value = 'test'), + 'Test', + (1, [],4), + ) + def testCIString3( self ): + self.doBasicTest( + CILiteral( value = 'test'), + 'TEST', + (1, [],4), + ) + def testCIString4( self ): + self.doBasicTest( + CILiteral( value = 'test'), + 'tes', + (0, [],AnyInt), + ) + def testCIString5( self ): + self.doBasicTest( + CILiteral( value = 'test', optional=1), + 'tes', + (1, [], 0), + ) ### Simpleparse 2.0.0b4 introduced an explicit check that ## rejects FOGroups with optional children to prevent @@ -357,64 +368,64 @@ ## 'testmnabtest', ## (1, [],12), ## ) - - def testNegative1( self ): - self.doBasicTest( - Literal( value = 's', negative=1), - 's\\', - (0, [],AnyInt), - ) - def testNegative2( self ): - self.doBasicTest( - Literal( value = 's', negative=1), - 'asa\\', - (1, [],1), - ) - def testNegative3( self ): - self.doBasicTest( - Literal( value = 's', negative=1, repeating=1), - 'aasa\\', - (1, [],2), - ) - def testNegative4( self ): - self.doBasicTest( - Literal( value = 's', negative=1, repeating=1, optional=1), - 'a', - (1, [],1), - ) - def testNegative4a( self ): - self.doBasicTest( - Literal( value = 's', negative=1, repeating=1, optional=1), - 'as', - (1, [],1), - ) - def testNegative4b( self ): - self.doBasicTest( - Literal( value = 's', negative=1, repeating=1, optional=1), - 'sas', - (1, [],0), - ) - def testNegative5( self ): - self.doBasicTest( - Range( value = 'sat', negative=1), - 'aasat\\', - (0, [],AnyInt), - ) - def testNegative6( self ): - self.doBasicTest( - Range( value = 'sat', negative=1, repeating=1), - 'aasat\\', - (0, [],AnyInt), - ) - def testNegative7( self ): - self.doBasicTest( - Range( value = 'sat', negative=1, repeating=1, optional=1), - 'aasat\\', - (1, [],0), - ) - + + def testNegative1( self ): + self.doBasicTest( + Literal( value = 's', negative=1), + 's\\', + (0, [],AnyInt), + ) + def testNegative2( self ): + self.doBasicTest( + Literal( value = 's', negative=1), + 'asa\\', + (1, [],1), + ) + def testNegative3( self ): + self.doBasicTest( + Literal( value = 's', negative=1, repeating=1), + 'aasa\\', + (1, [],2), + ) + def testNegative4( self ): + self.doBasicTest( + Literal( value = 's', negative=1, repeating=1, optional=1), + 'a', + (1, [],1), + ) + def testNegative4a( self ): + self.doBasicTest( + Literal( value = 's', negative=1, repeating=1, optional=1), + 'as', + (1, [],1), + ) + def testNegative4b( self ): + self.doBasicTest( + Literal( value = 's', negative=1, repeating=1, optional=1), + 'sas', + (1, [],0), + ) + def testNegative5( self ): + self.doBasicTest( + Range( value = 'sat', negative=1), + 'aasat\\', + (0, [],AnyInt), + ) + def testNegative6( self ): + self.doBasicTest( + Range( value = 'sat', negative=1, repeating=1), + 'aasat\\', + (0, [],AnyInt), + ) + def testNegative7( self ): + self.doBasicTest( + Range( value = 'sat', negative=1, repeating=1, optional=1), + 'aasat\\', + (1, [],0), + ) + def getSuite(): - return unittest.makeSuite(ElementTokenTests,'test') + return unittest.makeSuite(ElementTokenTests,'test') if __name__ == "__main__": - unittest.main(defaultTest="getSuite") + unittest.main(defaultTest="getSuite") diff -Nru simpleparse-2.1.0a1/tests/test_optimisation.py simpleparse-2.2.0/tests/test_optimisation.py --- simpleparse-2.1.0a1/tests/test_optimisation.py 2003-06-23 06:23:53.000000000 +0000 +++ simpleparse-2.2.0/tests/test_optimisation.py 2015-11-11 18:42:23.000000000 +0000 @@ -1,76 +1,78 @@ +from __future__ import print_function + import unittest, pprint, traceback from simpleparse.parser import Parser from simpleparse import printers def rcmp( table1, table2 ): - """Silly utility function to get around text search object lack of __cmp__""" - if len(table1) != len(table2): - return 0 - else: - for x,y in map(None, table1, table2): - if not _rcmp( x,y): - return 0 - return 1 + """Silly utility function to get around text search object lack of __cmp__""" + if len(table1) != len(table2): + return 0 + else: + for x,y in zip(table1, table2): + if not _rcmp( x,y): + return 0 + return 1 def _rcmp( item1, item2 ): - if len(item1) != len(item2): - return 0 - if item1[1] in (204,): - if cmp(item1[:2], item2[:2]) != 0: - return 0 - try: - if not rcmp( item1[2][0][item1[2][1]], item2[2][0][item2[2][1]]): - return 0 - except TypeError: - print item1 - print item2 - elif item1[1] == 207: - if cmp(item2[:2], item2[:2]) != 0: - return 0 - if not rcmp( item1[2], item2[2]): - return 0 - else: - for a,b in map(None, item1, item2): - if hasattr(a,'match') and hasattr(b,'match'): - if not (a.match == b.match and a.translate == b.translate): - return 0 - elif a != b: - return 0 - return 1 - - + if len(item1) != len(item2): + return 0 + if item1[1] in (204,): + if cmp(item1[:2], item2[:2]) != 0: + return 0 + try: + if not rcmp( item1[2][0][item1[2][1]], item2[2][0][item2[2][1]]): + return 0 + except TypeError: + print(item1) + print(item2) + elif item1[1] == 207: + if item2[:2] != item2[:2]: + return 0 + if not rcmp( item1[2], item2[2]): + return 0 + else: + for a,b in zip(item1, item2): + if hasattr(a,'match') and hasattr(b,'match'): + if not (a.match == b.match and a.translate == b.translate): + return 0 + elif a != b: + return 0 + return 1 + + class OptimisationTests(unittest.TestCase): - def testTermCompression( self ): - """Test that unreported productions are compressed + def testTermCompression( self ): + """Test that unreported productions are compressed - Term compression is basically an inlining of terminal - expressions into the calling table. At the moment - the terminal expressions are all duplicated, which may - balloon the size of the grammar, not sure if this will - be an actual problem. As written, this optimization - should provide a significant speed up, but there may - the even more of a speed up if we allow for sharing - the terminal tuples as well. - - This: - a:=b := -c* c:='this' - Should eventually compress to this: - a := -'this'* - """ - failures = [] - for first, second in [ - ("""a:=b := -c* c:='this'""", """a := -'this'*"""), - ("""a:=b >b<:= c c:= 'this'""", """a := c c:= 'this'"""), - ("""a:=b >b<:= c := 'this'""", """a := 'this'"""), - ("""a:=b >b<:= c+ := 'this'""", """a := 'this'+"""), - # The following will never work, so eventually may raise - # an error or at least give a warning! - ("""a:=b,c >b<:= c+ := 'this'""", """a := 'this'+,'this'"""), - ("""a:=b/c >b<:= c+ := 'this'""", """a := 'this'+/'this'"""), - # This is requiring group-compression, which isn't yet written - ("""a:=-b/c >b<:= c+ := 'this'""", """a := -'this'+/'this'"""), - ("""a := (table1 / table2 / any_line)* + Term compression is basically an inlining of terminal + expressions into the calling table. At the moment + the terminal expressions are all duplicated, which may + balloon the size of the grammar, not sure if this will + be an actual problem. As written, this optimization + should provide a significant speed up, but there may + the even more of a speed up if we allow for sharing + the terminal tuples as well. + + This: + a:=b := -c* c:='this' + Should eventually compress to this: + a := -'this'* + """ + failures = [] + for first, second in [ + ("""a:=b := -c* c:='this'""", """a := -'this'*"""), + ("""a:=b >b<:= c c:= 'this'""", """a := c c:= 'this'"""), + ("""a:=b >b<:= c := 'this'""", """a := 'this'"""), + ("""a:=b >b<:= c+ := 'this'""", """a := 'this'+"""), + # The following will never work, so eventually may raise + # an error or at least give a warning! + ("""a:=b,c >b<:= c+ := 'this'""", """a := 'this'+,'this'"""), + ("""a:=b/c >b<:= c+ := 'this'""", """a := 'this'+/'this'"""), + # This is requiring group-compression, which isn't yet written + ("""a:=-b/c >b<:= c+ := 'this'""", """a := -'this'+/'this'"""), + ("""a := (table1 / table2 / any_line)* := ANY*, EOL := -EOL := '\n' @@ -80,46 +82,46 @@ table1 := 'a' table2 := 'b' """), - ("""a:= b,c := -c* := '\n'""", """a := -'\n'*,'\n'"""), - - ]: - pFirst = Parser( first, "a") - pSecond = Parser( second, "a") - tFirst = pFirst.buildTagger() - tSecond = pSecond.buildTagger() - if not rcmp( tFirst , tSecond): - tFirstRepr = pprint.pformat(tFirst) - tSecondRepr = pprint.pformat(tSecond) - failures.append( """%(first)r did not produce the same parser as %(second)r\n\t%(tFirstRepr)s\n\t%(tSecondRepr)s"""%locals()) - if failures: - raise ValueError( "\n".join(failures)) - def testTermSharing( self ): - """Test that shared terminal productions are using the same parser""" - first =""" a := b,b >b<:= d d:= 'this'""" - pFirst = Parser( first, "a") - tFirst = pFirst.buildTagger() - b,c = tFirst - assert b is c, """Not sharing the same tuple for b and c instances""" - def testNoReportPassDown( self ): - """Test that a non-reporting production does not produce reporting sub-productions""" - first =""" a := b := d,e d:= e e:= 'this'""" - second =""" a := 'this' """ - assert Parser( first, 'a').parse( 'thisthis' ) == (1,[ - ],8) - - def testNameCollapseForSimple( self ): - """Test that a name reference, given a single-item reporting avoids extra table""" - first =""" a := b,b b:= 'this'""" - # The result should be... - expected = ( ('b',21,'this'),('b',21,'this')) - table = Parser( first, 'a').buildTagger( ) - assert table == expected, "%s != %s"%( - pprint.pformat( table), - pprint.pformat(expected), - ) - + ("""a:= b,c := -c* := '\n'""", """a := -'\n'*,'\n'"""), + + ]: + pFirst = Parser( first, "a") + pSecond = Parser( second, "a") + tFirst = pFirst.buildTagger() + tSecond = pSecond.buildTagger() + if not rcmp( tFirst , tSecond): + tFirstRepr = pprint.pformat(tFirst) + tSecondRepr = pprint.pformat(tSecond) + failures.append( """%(first)r did not produce the same parser as %(second)r\n\t%(tFirstRepr)s\n\t%(tSecondRepr)s"""%locals()) + if failures: + raise ValueError( "\n".join(failures)) + def testTermSharing( self ): + """Test that shared terminal productions are using the same parser""" + first =""" a := b,b >b<:= d d:= 'this'""" + pFirst = Parser( first, "a") + tFirst = pFirst.buildTagger() + b,c = tFirst + assert b is c, """Not sharing the same tuple for b and c instances""" + def testNoReportPassDown( self ): + """Test that a non-reporting production does not produce reporting sub-productions""" + first =""" a := b := d,e d:= e e:= 'this'""" + second =""" a := 'this' """ + assert Parser( first, 'a').parse( 'thisthis' ) == (1,[ + ],8) + + def testNameCollapseForSimple( self ): + """Test that a name reference, given a single-item reporting avoids extra table""" + first =""" a := b,b b:= 'this'""" + # The result should be... + expected = ( ('b',21,'this'),('b',21,'this')) + table = Parser( first, 'a').buildTagger( ) + assert table == expected, "%s != %s"%( + pprint.pformat( table), + pprint.pformat(expected), + ) + def getSuite(): - return unittest.makeSuite(OptimisationTests,'test') + return unittest.makeSuite(OptimisationTests,'test') if __name__ == "__main__": - unittest.main(defaultTest="getSuite") + unittest.main(defaultTest="getSuite") diff -Nru simpleparse-2.1.0a1/tests/test_printers.py simpleparse-2.2.0/tests/test_printers.py --- simpleparse-2.1.0a1/tests/test_printers.py 2006-02-19 00:57:40.000000000 +0000 +++ simpleparse-2.2.0/tests/test_printers.py 2015-11-11 19:01:43.000000000 +0000 @@ -3,35 +3,59 @@ This just uses the simpleparsegrammar declaration, which is parsed, then linearised, then loaded as a Python module. """ -import os, unittest -import test_grammarparser +import os, unittest, shutil +from . import test_grammarparser +try: + reload +except NameError: + from importlib import reload testModuleFile = 'test_printers_garbage.py' +HERE = os.path.dirname(__file__) +TEST_DIR = os.path.join( HERE, 'tempmodules' ) +def setUp(self): + if os.path.exists(TEST_DIR): + shutil.rmtree( TEST_DIR ) + os.makedirs(TEST_DIR) + open( + os.path.join(TEST_DIR,'__init__.py'), + 'w' + ).close() +def tearDown(self): + shutil.rmtree( TEST_DIR ) + class PrintersTests(test_grammarparser.SimpleParseGrammarTests): - def setUp( self ): - from simpleparse import simpleparsegrammar, parser, printers, baseparser - p = parser.Parser( simpleparsegrammar.declaration, 'declarationset') - open(testModuleFile,'w').write(printers.asGenerator( p._generator )) - import test_printers_garbage - reload( test_printers_garbage ) - - class RParser( test_printers_garbage.Parser, baseparser.BaseParser ): - pass + def setUp( self ): + from simpleparse import simpleparsegrammar, parser, printers, baseparser + name = self.id().split('.')[-1] + filename = name + '.py' + + testModuleFile = os.path.join(TEST_DIR,filename) + + p = parser.Parser( simpleparsegrammar.declaration, 'declarationset') + with open(testModuleFile,'w') as fh: + fh.write(printers.asGenerator( p._generator )) + mod_name = '%s.tempmodules.%s'%(__name__.rsplit('.',1)[0],name,) + test_printers_garbage = __import__( mod_name,{},{},mod_name.split('.') ) + reload( test_printers_garbage ) + + class RParser( test_printers_garbage.Parser, baseparser.BaseParser ): + pass - self.recursiveParser = RParser() - def tearDown( self ): - try: - os.remove( testModuleFile ) - except IOError, err: - pass - def doBasicTest(self, parserName, testValue, expected, ): - result = self.recursiveParser.parse( testValue, production=parserName ) - assert result == expected, '''\nexpected:%s\n got:%s\n'''%( expected, result ) + self.recursiveParser = RParser() + def tearDown( self ): + try: + os.remove( testModuleFile ) + except (OSError,IOError): + pass + def doBasicTest(self, parserName, testValue, expected, ): + result = self.recursiveParser.parse( testValue, production=parserName ) + assert result == expected, '''\nexpected:%s\n got:%s\n'''%( expected, result ) def getSuite(): - return unittest.makeSuite(PrintersTests,'test') + return unittest.makeSuite(PrintersTests,'test') if __name__ == "__main__": - unittest.main(defaultTest="getSuite") - + unittest.main(defaultTest="getSuite") + diff -Nru simpleparse-2.1.0a1/tests/test.py simpleparse-2.2.0/tests/test.py --- simpleparse-2.1.0a1/tests/test.py 2006-02-19 00:30:43.000000000 +0000 +++ simpleparse-2.2.0/tests/test.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,42 +0,0 @@ -import unittest -import mx_test, test_objectgenerator, test_simpleparsegrammar -import test_common_chartypes, test_common_numbers -import sys -try: - import test_common_iso_date -except ImportError: - sys.stderr.write( """Unable to test ISO dates, no mxDateTime module\n""" ) - test_common_iso_date = None -import test_common_strings, test_printers, test_optimisation, test_common_comments -import test_xml - -import string -from simpleparse.stt import TextTools -mxVersion = tuple(string.split( TextTools.__version__, '.')) -mxVersion = mxVersion[:3] - -def getSuite(): - set = [] - for module in [ - mx_test, - test_objectgenerator, - test_simpleparsegrammar, - test_common_chartypes, - test_common_numbers, - test_common_iso_date, - test_common_strings, - test_common_comments, - test_printers, - test_xml, - test_optimisation, - ]: - if hasattr( module, 'getSuite'): - set.append( module.getSuite() ) - elif module: - sys.stderr.write( "module %r has no getSuite function, skipping\n"%(module,)) - return unittest.TestSuite( - set - ) - -if __name__ == "__main__": - unittest.main(defaultTest="getSuite") diff -Nru simpleparse-2.1.0a1/tests/test_simpleparsegrammar.py simpleparse-2.2.0/tests/test_simpleparsegrammar.py --- simpleparse-2.1.0a1/tests/test_simpleparsegrammar.py 2006-02-18 23:15:48.000000000 +0000 +++ simpleparse-2.2.0/tests/test_simpleparsegrammar.py 2015-11-11 19:01:43.000000000 +0000 @@ -1,229 +1,231 @@ -import unittest, pprint +import unittest, sys from simpleparse.parser import Parser from simpleparse.stt.TextTools import TextTools -from genericvalues import NullResult, AnyInt - +from .genericvalues import NullResult, AnyInt +from . import test_grammarparser +from . import test_erroronfail + class ParserGenerationTests(unittest.TestCase): - def doBasicTest(self, definition, parserName, testValue, expected, ): - result = Parser( definition).parse( testValue, parserName ) - assert result == expected, '''\nexpected:%s\n got:%s\n'''%( expected, result ) - def testGenNegRange1( self ): - self.doBasicTest( - '''s := - something * - := [ab]''', - 's', - 'mmmab', - (1,[],3) - ) - def testGenNegRange2( self ): - self.doBasicTest( - '''s := - something - := [ab]''', - 's', - 'mmmab', - (1,[],1) - ) - def testGenNegLit1( self ): - self.doBasicTest( - '''s := - something * - := "a"''', - 's', - 'mmmab', - (1,[],3) - ) - def testGenPosReptOpt1( self ): - self.doBasicTest( - '''s := something * - something := "a" ''', - 's', - 'aammmab', - (1,[("something",0,1,NullResult),("something",1,2,NullResult)],2) - ) - def testGenPosReptOpt2( self ): - self.doBasicTest( - '''s := something * - something := "a" ''', - 's', - 'mmmab', - (1,[],0) - ) - def testGenPosRept1( self ): - self.doBasicTest( - '''s := something + - something := "a" ''', - 's', - 'mmmab', - (0,[],AnyInt) - ) - - def testLookaheadPositive( self ): - self.doBasicTest( - '''s := ?"b" - ''', - 's', - 'bbbba', - (1,[ - ],0) - ) - def testLookaheadNeg( self ): - self.doBasicTest( - '''s := ?-"b" - ''', - 's', - 'bbbba', - (0,[ - ],AnyInt) - ) - def testLookaheadNeg2( self ): - self.doBasicTest( - '''s := ?-"b"? - ''', - 's', - 'bbbba', - (1,[ - ],0) - ) - def testLookaheadNeg3( self ): - self.doBasicTest( - '''s := "b", ?-"a" - ''', - 's', - 'bbbba', - (1,[ - ],1) - ) - def testLookaheadNeg4( self ): - self.doBasicTest( - '''s := "b", ?-"a", "ba" - ''', - 's', - 'bba', - (1,[ - ],3) - ) - def testLookaheadNeg5( self ): - self.doBasicTest( - '''s := ?-t, "ba" - t := "bad" - ''', - 's', - 'bac', - (1,[ - ],2) - ) - def testLookaheadNeg6( self ): - self.doBasicTest( - '''s := ?-t, "ba" - t := "bad" - ''', - 's', - 'bad', - (0,[ - ],AnyInt) - ) - def testLookahead2( self ): - """Test lookahead on literals (more complex)""" - self.doBasicTest( - '''s := something+, "ba" - something := "b",?-"a" - ''', - 's', - 'bbbba', - (1,[ - ("something",0,1,NullResult), - ("something",1,2,NullResult), - ("something",2,3,NullResult), - ],5) - ) - def testLookahead3( self ): - """Test lookahead on reported positive productions""" - self.doBasicTest( - '''s := ?trailer - trailer := "bad" - ''', - 's', - 'badba', - (1,[ - ("trailer",0,3,NullResult), - ],0) - ) - def testLookahead4( self ): - self.doBasicTest( - '''s := ?-trailer? - trailer := "bad" - ''', - 's', - 'badba', - (1,[ - ],0) - ) - - def testLookahead5( self ): - self.doBasicTest( - '''s := ?-trailer, 'ba' - trailer := "bad" - ''', - 's', - 'babba', - (1,[ - ],2) - ) - def testLookahead6( self ): - self.doBasicTest( - '''s := ?-trailer, 'ba' - trailer := "bad" - ''', - 's', - 'badba', - (0,[ - ],AnyInt) - ) - - def testGenPos1( self ): - self.doBasicTest( - '''s := something - something := "a" ''', - 's', - 'mmmab', - (0,[],AnyInt) - ) - def testGenPos2( self ): - self.doBasicTest( - '''s := something - something := "a" ''', - 's', - 'ammmab', - (1,[('something',0,1,NullResult),],1) - ) - - def testOptionalGroupHitEOF( self ): - """Test optional group hitting an EOF during success run""" - self.doBasicTest( - '''s := something* - something := ("a"/"b") ''', - 's', - 'aa', - (1,[ - ('something',0,1,NullResult), - ('something',1,2,NullResult), - ],2) - ) - def testMultiLineDef( self ): - """Test multi-line definitions""" - self.doBasicTest( - '''s := - something* - something := ( - "a"/ - "b" - ) ''', - 's', - 'aa', - (1,[ - ('something',0,1,NullResult), - ('something',1,2,NullResult), - ],2) - ) + def doBasicTest(self, definition, parserName, testValue, expected, ): + result = Parser( definition).parse( testValue, parserName ) + assert result == expected, '''\nexpected:%s\n got:%s\n'''%( expected, result ) + def testGenNegRange1( self ): + self.doBasicTest( + '''s := - something * + := [ab]''', + 's', + 'mmmab', + (1,[],3) + ) + def testGenNegRange2( self ): + self.doBasicTest( + '''s := - something + := [ab]''', + 's', + 'mmmab', + (1,[],1) + ) + def testGenNegLit1( self ): + self.doBasicTest( + '''s := - something * + := "a"''', + 's', + 'mmmab', + (1,[],3) + ) + def testGenPosReptOpt1( self ): + self.doBasicTest( + '''s := something * + something := "a" ''', + 's', + 'aammmab', + (1,[("something",0,1,NullResult),("something",1,2,NullResult)],2) + ) + def testGenPosReptOpt2( self ): + self.doBasicTest( + '''s := something * + something := "a" ''', + 's', + 'mmmab', + (1,[],0) + ) + def testGenPosRept1( self ): + self.doBasicTest( + '''s := something + + something := "a" ''', + 's', + 'mmmab', + (0,[],AnyInt) + ) + + def testLookaheadPositive( self ): + self.doBasicTest( + '''s := ?"b" + ''', + 's', + 'bbbba', + (1,[ + ],0) + ) + def testLookaheadNeg( self ): + self.doBasicTest( + '''s := ?-"b" + ''', + 's', + 'bbbba', + (0,[ + ],AnyInt) + ) + def testLookaheadNeg2( self ): + self.doBasicTest( + '''s := ?-"b"? + ''', + 's', + 'bbbba', + (1,[ + ],0) + ) + def testLookaheadNeg3( self ): + self.doBasicTest( + '''s := "b", ?-"a" + ''', + 's', + 'bbbba', + (1,[ + ],1) + ) + def testLookaheadNeg4( self ): + self.doBasicTest( + '''s := "b", ?-"a", "ba" + ''', + 's', + 'bba', + (1,[ + ],3) + ) + def testLookaheadNeg5( self ): + self.doBasicTest( + '''s := ?-t, "ba" + t := "bad" + ''', + 's', + 'bac', + (1,[ + ],2) + ) + def testLookaheadNeg6( self ): + self.doBasicTest( + '''s := ?-t, "ba" + t := "bad" + ''', + 's', + 'bad', + (0,[ + ],AnyInt) + ) + def testLookahead2( self ): + """Test lookahead on literals (more complex)""" + self.doBasicTest( + '''s := something+, "ba" + something := "b",?-"a" + ''', + 's', + 'bbbba', + (1,[ + ("something",0,1,NullResult), + ("something",1,2,NullResult), + ("something",2,3,NullResult), + ],5) + ) + def testLookahead3( self ): + """Test lookahead on reported positive productions""" + self.doBasicTest( + '''s := ?trailer + trailer := "bad" + ''', + 's', + 'badba', + (1,[ + ("trailer",0,3,NullResult), + ],0) + ) + def testLookahead4( self ): + self.doBasicTest( + '''s := ?-trailer? + trailer := "bad" + ''', + 's', + 'badba', + (1,[ + ],0) + ) + + def testLookahead5( self ): + self.doBasicTest( + '''s := ?-trailer, 'ba' + trailer := "bad" + ''', + 's', + 'babba', + (1,[ + ],2) + ) + def testLookahead6( self ): + self.doBasicTest( + '''s := ?-trailer, 'ba' + trailer := "bad" + ''', + 's', + 'badba', + (0,[ + ],AnyInt) + ) + + def testGenPos1( self ): + self.doBasicTest( + '''s := something + something := "a" ''', + 's', + 'mmmab', + (0,[],AnyInt) + ) + def testGenPos2( self ): + self.doBasicTest( + '''s := something + something := "a" ''', + 's', + 'ammmab', + (1,[('something',0,1,NullResult),],1) + ) + + def testOptionalGroupHitEOF( self ): + """Test optional group hitting an EOF during success run""" + self.doBasicTest( + '''s := something* + something := ("a"/"b") ''', + 's', + 'aa', + (1,[ + ('something',0,1,NullResult), + ('something',1,2,NullResult), + ],2) + ) + def testMultiLineDef( self ): + """Test multi-line definitions""" + self.doBasicTest( + '''s := + something* + something := ( + "a"/ + "b" + ) ''', + 's', + 'aa', + (1,[ + ('something',0,1,NullResult), + ('something',1,2,NullResult), + ],2) + ) ## def testRepeatOptionalFail( self ): ## """Explicit test of the optional-repeating-child of repeating object ## """ @@ -241,150 +243,168 @@ ## ],24), ## ) - def testGenCILiteral1( self ): - self.doBasicTest( - '''s := c"this"''', - 's', - 'this', - (1,[],4) - ) - def testGenCILiteral2( self ): - self.doBasicTest( - '''s := c"this"''', - 's', - 'This', - (1,[],4) - ) - def testGenCILiteral3( self ): - self.doBasicTest( - '''s := c"this"''', - 's', - 'THIS', - (1,[],4) - ) - def testGenCILiteral4( self ): - self.doBasicTest( - '''s := -c"this"''', - 's', - ' THIS', - (1,[],1) - ) - def testGenCILiteral5( self ): - self.doBasicTest( - '''s := -c"this"''', - 's', - ' thi', - (1,[],1) - ) - def testGenCILiteral6( self ): - self.doBasicTest( - '''s := -c"this"*''', - 's', - ' thi', - (1,[],4) - ) + def testGenCILiteral1( self ): + self.doBasicTest( + '''s := c"this"''', + 's', + 'this', + (1,[],4) + ) + def testGenCILiteral2( self ): + self.doBasicTest( + '''s := c"this"''', + 's', + 'This', + (1,[],4) + ) + def testGenCILiteral3( self ): + self.doBasicTest( + '''s := c"this"''', + 's', + 'THIS', + (1,[],4) + ) + def testGenCILiteral4( self ): + self.doBasicTest( + '''s := -c"this"''', + 's', + ' THIS', + (1,[],1) + ) + def testGenCILiteral5( self ): + self.doBasicTest( + '''s := -c"this"''', + 's', + ' thi', + (1,[],1) + ) + def testGenCILiteral6( self ): + self.doBasicTest( + '''s := -c"this"*''', + 's', + ' thi', + (1,[],4) + ) + def testGenUnicodeRange( self ): + self.doBasicTest( + '''s := [\u0600-\u06ff]+''', + 's', + u'\u0600\u06ff', + (1,[],2) + ) + if sys.version_info[0] < 3: + def testGenUnicodeRangeBroken( self ): + self.assertRaises( + ValueError, + self.doBasicTest, + '''s := [a-\u06ff]+''', + 's', + u'\u0600\u06ff', + (1,[],2) + ) + class NameTests(unittest.TestCase): - def doBasicTest(self, definition, parserName, testValue, expected, ): - result = Parser( definition).parse( testValue, production=parserName ) - assert result == expected, '''\nexpected:%s\n got:%s\n'''%( expected, result ) - def test_p( self ): - self.doBasicTest( - '''s := something - something := "a" ''', - 's', - 'ammmab', - (1,[('something',0,1,NullResult),],1) - ) - def test_po( self ): - self.doBasicTest( - '''s := something? - something := "a" ''', - 's', - 'ammmab', - (1,[('something',0,1,NullResult),],1) - ) - def test_por( self ): - self.doBasicTest( - '''s := something* - something := "a" ''', - 's', - 'ammmab', - (1,[('something',0,1,NullResult),],1) - ) - def test_pr( self ): - self.doBasicTest( - '''s := something+ - something := "a" ''', - 's', - 'ammmab', - (1,[('something',0,1,NullResult),],1) - ) - - def test_n( self ): - self.doBasicTest( - '''s := - something - := [ab]''', - 's', - 'mmmab', - (1,[],1) - ) - def test_no( self ): - self.doBasicTest( - '''s := - something? - := [ab]''', - 's', - 'mmmab', - (1,[],1) - ) - def test_nor( self ): - self.doBasicTest( - '''s := - something* - := [ab]''', - 's', - 'mmmab', - (1,[],3) - ) - def test_nr( self ): - self.doBasicTest( - '''s := - something+ - := [ab]''', - 's', - 'mmmab', - (1,[],3) - ) - def test_n_f( self ): - self.doBasicTest( - '''s := - something - := [ab]''', - 's', - 'ammmab', - (0,[],AnyInt) - ) - def test_no_f( self ): - self.doBasicTest( - '''s := - something? - := [ab]''', - 's', - 'ammmab', - (1,[],0) - ) - def test_nor_f( self ): - self.doBasicTest( - '''s := - something* - := [ab]''', - 's', - 'ammmab', - (1,[],0) - ) - def test_nr_f( self ): - self.doBasicTest( - '''s := - something + - := [ab]''', - 's', - 'ammmab', - (0,[],AnyInt) - ) + def doBasicTest(self, definition, parserName, testValue, expected, ): + result = Parser( definition).parse( testValue, production=parserName ) + assert result == expected, '''\nexpected:%s\n got:%s\n'''%( expected, result ) + def test_p( self ): + self.doBasicTest( + '''s := something + something := "a" ''', + 's', + 'ammmab', + (1,[('something',0,1,NullResult),],1) + ) + def test_po( self ): + self.doBasicTest( + '''s := something? + something := "a" ''', + 's', + 'ammmab', + (1,[('something',0,1,NullResult),],1) + ) + def test_por( self ): + self.doBasicTest( + '''s := something* + something := "a" ''', + 's', + 'ammmab', + (1,[('something',0,1,NullResult),],1) + ) + def test_pr( self ): + self.doBasicTest( + '''s := something+ + something := "a" ''', + 's', + 'ammmab', + (1,[('something',0,1,NullResult),],1) + ) + + def test_n( self ): + self.doBasicTest( + '''s := - something + := [ab]''', + 's', + 'mmmab', + (1,[],1) + ) + def test_no( self ): + self.doBasicTest( + '''s := - something? + := [ab]''', + 's', + 'mmmab', + (1,[],1) + ) + def test_nor( self ): + self.doBasicTest( + '''s := - something* + := [ab]''', + 's', + 'mmmab', + (1,[],3) + ) + def test_nr( self ): + self.doBasicTest( + '''s := - something+ + := [ab]''', + 's', + 'mmmab', + (1,[],3) + ) + def test_n_f( self ): + self.doBasicTest( + '''s := - something + := [ab]''', + 's', + 'ammmab', + (0,[],AnyInt) + ) + def test_no_f( self ): + self.doBasicTest( + '''s := - something? + := [ab]''', + 's', + 'ammmab', + (1,[],0) + ) + def test_nor_f( self ): + self.doBasicTest( + '''s := - something* + := [ab]''', + 's', + 'ammmab', + (1,[],0) + ) + def test_nr_f( self ): + self.doBasicTest( + '''s := - something + + := [ab]''', + 's', + 'ammmab', + (0,[],AnyInt) + ) ## def test_por_big( self ): ## """This test creates 1,000,000 result tuples (very inefficiently, I might add)... ## on my machine that takes a long time, so I do not bother with the test @@ -400,122 +420,120 @@ ## ],1000000) ## ) - def test_expanded_name( self ): - """Non-reporting (expanded) name test + def test_expanded_name( self ): + """Non-reporting (expanded) name test - Tests new feature, a name whose children - are reported, but which is not itself reported, - basically this lets you create anonymous - groups which can be referenced from other - productions. - """ - self.doBasicTest( - '''s := something + - >something< := r - r := [ab] - v := [c] - ''', - 's', - 'abammmab', - (1,[ - ('r',0,1, NullResult), - ('r',1,2, NullResult), - ('r',2,3, NullResult), - ],3) - ) - - def test_expanded_SingleNameChild( self ): - """Expanded group with single child which is a Name itself - - This originally failed when the Name object's report value - was changed to 0 (redundant information for the "expanded" code), - resulting in the child production not getting reported. - """ - self.doBasicTest( - '''s := something + - something := r - r := [ab]''', - 'something', - 'abammmab', - (1,[ - ('r',0,1, NullResult), - ],1) - ) + Tests new feature, a name whose children + are reported, but which is not itself reported, + basically this lets you create anonymous + groups which can be referenced from other + productions. + """ + self.doBasicTest( + '''s := something + + >something< := r + r := [ab] + v := [c] + ''', + 's', + 'abammmab', + (1,[ + ('r',0,1, NullResult), + ('r',1,2, NullResult), + ('r',2,3, NullResult), + ],3) + ) + + def test_expanded_SingleNameChild( self ): + """Expanded group with single child which is a Name itself + + This originally failed when the Name object's report value + was changed to 0 (redundant information for the "expanded" code), + resulting in the child production not getting reported. + """ + self.doBasicTest( + '''s := something + + something := r + r := [ab]''', + 'something', + 'abammmab', + (1,[ + ('r',0,1, NullResult), + ],1) + ) class BasicMethodSource: - def __init__( self ): - self.results = [] - def _m_a( self, taglist,text,l,r,subtags ): - self.results.append( ('a',text[l:r])) - def _m_b( self, taglist, text, l,r,subtags): - self.results.append( ('b',l,r) ) - _m_c = TextTools.AppendMatch - _m_d = TextTools.AppendTagobj - _o_d = "hello world" + def __init__( self ): + self.results = [] + def _m_a( self, taglist,text,l,r,subtags ): + self.results.append( ('a',text[l:r])) + def _m_b( self, taglist, text, l,r,subtags): + self.results.append( ('b',l,r) ) + _m_c = TextTools.AppendMatch + _m_d = TextTools.AppendTagobj + _o_d = "hello world" class AppendToTagobjMethodSource: - def __init__( self ): - self._o_d = [] - _m_d = TextTools.AppendToTagobj + def __init__( self ): + self._o_d = [] + _m_d = TextTools.AppendToTagobj class CallTests(unittest.TestCase): - """Tests semantics of calling objects from a method source during parsing""" - def parse( self, definition, parserName, testValue, source): - result = Parser( - definition, - ).parse(testValue, production=parserName, processor = source) - return result - def test_basic_call( self ): - """Test basic ability to call a method instead of regular functioning""" - source = BasicMethodSource() - self.parse( """ - x := (a/b)* - a := "a" - b := "b" - """, 'x', 'abba', source) - assert source.results == [ ('a','a'),('b',1,2),('b',2,3),('a','a'),], """Method source methods were not called, or called improperly:\n%s"""%(source.results,) - - def test_AppendMatch( self ): - """Test ability to append the text-string match to the results list""" - source = BasicMethodSource() - result = self.parse( """ - x := c* - c := 'c' - """, 'x', 'ccc', source) - assert result == (1,[ - 'c','c','c', - ],3), """Result was %s"""%( result, ) - - def test_AppendTagObj( self ): - """Test appending the tagobject to the results list""" - source = BasicMethodSource() - result = self.parse( """ - x := d* - d := 'd' - """, 'x', 'ddd', source) - assert result == (1,[ - "hello world","hello world","hello world", - ],3) - - def test_AppendToTagObj( self ): - """Test basic ability to call a method instead of regular functioning""" - source = AppendToTagobjMethodSource() - result = self.parse( """ - x := d* - d := 'd' - """, 'x', 'ddd', source) - assert source._o_d == [ (None,0,1,NullResult),(None,1,2,NullResult),(None,2,3,NullResult)], """Method source methods were not called, or called improperly:\n%s"""%(source._o_d,) + """Tests semantics of calling objects from a method source during parsing""" + def parse( self, definition, parserName, testValue, source): + result = Parser( + definition, + ).parse(testValue, production=parserName, processor = source) + return result + def test_basic_call( self ): + """Test basic ability to call a method instead of regular functioning""" + source = BasicMethodSource() + self.parse( """ + x := (a/b)* + a := "a" + b := "b" + """, 'x', 'abba', source) + assert source.results == [ ('a','a'),('b',1,2),('b',2,3),('a','a'),], """Method source methods were not called, or called improperly:\n%s"""%(source.results,) + + def test_AppendMatch( self ): + """Test ability to append the text-string match to the results list""" + source = BasicMethodSource() + result = self.parse( """ + x := c* + c := 'c' + """, 'x', 'ccc', source) + assert result == (1,[ + 'c','c','c', + ],3), """Result was %s"""%( result, ) + + def test_AppendTagObj( self ): + """Test appending the tagobject to the results list""" + source = BasicMethodSource() + result = self.parse( """ + x := d* + d := 'd' + """, 'x', 'ddd', source) + assert result == (1,[ + "hello world","hello world","hello world", + ],3) + + def test_AppendToTagObj( self ): + """Test basic ability to call a method instead of regular functioning""" + source = AppendToTagobjMethodSource() + self.parse( """ + x := d* + d := 'd' + """, 'x', 'ddd', source) + assert source._o_d == [ (None,0,1,NullResult),(None,1,2,NullResult),(None,2,3,NullResult)], """Method source methods were not called, or called improperly:\n%s"""%(source._o_d,) -import test_grammarparser -import test_erroronfail def getSuite(): - return unittest.TestSuite(( - test_grammarparser.getSuite(), - test_erroronfail.getSuite(), - unittest.makeSuite(ParserGenerationTests, 'test'), - unittest.makeSuite(NameTests, 'test'), - unittest.makeSuite(CallTests, 'test'), - )) + return unittest.TestSuite(( + test_grammarparser.getSuite(), + test_erroronfail.getSuite(), + unittest.makeSuite(ParserGenerationTests, 'test'), + unittest.makeSuite(NameTests, 'test'), + unittest.makeSuite(CallTests, 'test'), + )) if __name__ == "__main__": - unittest.main(defaultTest="getSuite") + unittest.main(defaultTest="getSuite") diff -Nru simpleparse-2.1.0a1/tests/test_xml.py simpleparse-2.2.0/tests/test_xml.py --- simpleparse-2.1.0a1/tests/test_xml.py 2002-07-23 22:40:43.000000000 +0000 +++ simpleparse-2.2.0/tests/test_xml.py 2015-11-11 18:42:23.000000000 +0000 @@ -1,263 +1,271 @@ -from simpleparse.xml import xml_parser +from simpleparse.xmlparser import xml_parser from simpleparse.parser import Parser -import unittest, string +import unittest +try: + unicode +except NameError: + unicode = str p = Parser( xml_parser.declaration ) class XMLProductionTests(unittest.TestCase): - """Tests that XML grammar productions match appropriate values""" - ### ProductionTests will be added here by loop below... + """Tests that XML grammar productions match appropriate values""" + ### ProductionTests will be added here by loop below... class ProductionTest: - def __init__( self, production, should, shouldnot ): - self.production = production - self.should = should - self.shouldnot = shouldnot - def __call__( self ): - """Perform the test""" - for item in self.should: - success, children, next = p.parse( item, self.production ) - assert success, """Didn't parse %s as a %s, should have"""%( repr(item), self.production) - assert next == len(item), """Didn't parse whole of %s as a %s, parsed %s of %s characters, results were:\n%s\nRest was:\n%s"""%( repr(item), self.production, next, len(item), children, item[next:]) - for item in shouldnot: - success, children, next = p.parse( item, self.production ) - assert not success, """Parsed %s chars of %s as a %s, shouldn't have, result was:\n%s"""%( next, repr(item), self.production, children) + def __init__( self, production, should, shouldnot ): + self.production = production + self.should = should + self.shouldnot = shouldnot + def __call__( self ): + """Perform the test""" + for item in self.should: + if isinstance(item,unicode): + item = item.encode('utf-8') + success, children, next = p.parse( item, self.production ) + assert success, """Didn't parse %s as a %s, should have"""%( repr(item), self.production) + assert next == len(item), """Didn't parse whole of %s as a %s, parsed %s of %s characters, results were:\n%s\nRest was:\n%s"""%( repr(item), self.production, next, len(item), children, item[next:]) + for item in shouldnot: + if isinstance(item,unicode): + item = item.encode('utf-8') + success, children, next = p.parse( item, self.production ) + assert not success, """Parsed %s chars of %s as a %s, shouldn't have, result was:\n%s"""%( next, repr(item), self.production, children) def getSuite(): - return unittest.makeSuite(XMLProductionTests, 'test') + return unittest.makeSuite(XMLProductionTests, 'test') testData = { - "CharData":( - [# should match - """Type """, - ], - [# should not match - ], - ), - "Attribute":( - [# should match - """s=&this;""", - '''s="&this;"''', - """&this;""", - ], - [# should not match - # unfinished elements - ], - ), - - "element":( - [# should match - """""", - """""", - """""", - """""", - """""", - ], - [# should not match - # unfinished elements - """""", - """""", - """""", - # end with no start... - """""", - # malformed end tags - """""", - """""", - ], - ), - - "content":( - [# should match - """Type less-than (<) to save options. + "CharData":( + [# should match + """Type """, + ], + [# should not match + ], + ), + "Attribute":( + [# should match + """s=&this;""", + '''s="&this;"''', + """&this;""", + ], + [# should not match + # unfinished elements + ], + ), + + "element":( + [# should match + """""", + """""", + """""", + """""", + """""", + ], + [# should not match + # unfinished elements + """""", + """""", + """""", + # end with no start... + """""", + # malformed end tags + """""", + """""", + ], + ), + + "content":( + [# should match + """Type less-than (<) to save options. This document was prepared on &docdate; and is classified &security-level;.""", - """""", - """""", - """""", - """""", - """&this;""", - """""", - ], - [# should not match - # unfinished elements - """""", - """""", - """""", - # end with no start... - """""", - # malformed end tags - """""", - """""", - ], - ), - "AttValue":( - [# should match - '''"&this;"''', - ], - [# should not match - ], - ), - - "Name": ( - [# should match - "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-:._", - "_a", - ":a", - ":a", - ], - [# should not match - "-a", - "0", - "0.0", - ".this", - ], - ), - "Comment": ( - [# should match - "", - "", - "", - "", - "", - "", - ], - [# should not match - "", - "", - "", - ], - ), - "prolog": ( - [ # should match - """ """, - """ - - ]>""", - """""", - """ - - - ]>""", + """""", + """""", + """""", + """""", + """&this;""", + """""", + ], + [# should not match + # unfinished elements + """""", + """""", + """""", + # end with no start... + """""", + # malformed end tags + """""", + """""", + ], + ), + "AttValue":( + [# should match + '''"&this;"''', + ], + [# should not match + ], + ), + + "Name": ( + [# should match + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-:._", + "_a", + ":a", + ":a", + ], + [# should not match + "-a", + "0", + "0.0", + ".this", + ], + ), + "Comment": ( + [# should match + "", + "", + "", + "", + "", + "", + ], + [# should not match + "", + "", + "", + ], + ), + "prolog": ( + [ # should match + """ """, + """ + + ]>""", + """""", + """ + + + ]>""", - """ - - - - %ISOLat2; - ]>""", - - ], - [ # should not match - ], - ), - - "ExternalID": ( - [# should match - '''SYSTEM "hello.dtd"''', - ], - [# should not match - ], - ), - "elementdecl": ( - [# should match - '''''', - """""", - """""", - """""", - """""", - """""", - """""", - """""", - ], - [# should not match - """""", - ], - ), - "elementdecl_pe": ( - [# should match - """ %name.para; %content.para;""", - ], - [# should not match - ], - ), - - "contentspec": ( - [# should match - '''EMPTY''', - '''ANY''', - '''%content.para;''', - ], - [# should not match - ], - ), - - "AttlistDecl": ( - [# should match - ''' + + + + %ISOLat2; + ]>""", + + ], + [ # should not match + ], + ), + + "ExternalID": ( + [# should match + '''SYSTEM "hello.dtd"''', + ], + [# should not match + ], + ), + "elementdecl": ( + [# should match + '''''', + """""", + """""", + """""", + """""", + """""", + """""", + """""", + ], + [# should not match + """""", + ], + ), + "elementdecl_pe": ( + [# should match + """ %name.para; %content.para;""", + ], + [# should not match + ], + ), + + "contentspec": ( + [# should match + '''EMPTY''', + '''ANY''', + '''%content.para;''', + ], + [# should not match + ], + ), + + "AttlistDecl": ( + [# should match + '''''', - """""", - """""", - ], - [# should not match - ], - ), - "AttDef": ( - [# should match - ''' id ID #REQUIRED''', - """ name CDATA #IMPLIED""", - ''' type (bullets|ordered|glossary) "ordered"''', - ''' method CDATA #FIXED "POST"''', - ], - [# should not match - ], - ), - - "EntityDecl": ( - [ - """""", - """""", + """""", - """""", - """""", - ], - [# should not match - ], - ), - "EntityDef":( - [ - '''PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" + ], + [# should not match + ], + ), + "EntityDef":( + [ + '''PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml"''', - ], - [# should not match - ], - ), - "PubidLiteral":( - [ - '''"-//Textuality//TEXT Standard open-hatch boilerplate//EN"''', - ], - [# should not match - ], - ), + ], + [# should not match + ], + ), + "PubidLiteral":( + [ + '''"-//Textuality//TEXT Standard open-hatch boilerplate//EN"''', + ], + [# should not match + ], + ), } -for production, (should,shouldnot) in testData.items(): - setattr( XMLProductionTests, 'test'+production, ProductionTest(production, should, shouldnot)) +for production, (should,shouldnot) in list(testData.items()): + setattr( XMLProductionTests, 'test'+production, ProductionTest(production, should, shouldnot)) if __name__ == "__main__": - unittest.main(defaultTest="getSuite") + unittest.main(defaultTest="getSuite") diff -Nru simpleparse-2.1.0a1/tox.ini simpleparse-2.2.0/tox.ini --- simpleparse-2.1.0a1/tox.ini 1970-01-01 00:00:00.000000000 +0000 +++ simpleparse-2.2.0/tox.ini 2015-11-11 18:55:09.000000000 +0000 @@ -0,0 +1,7 @@ +[tox] +# py35 is broken on Ubuntu at the moment +envlist=py27,py34,py26 + +[testenv] +deps=nose +commands=nosetests -w tests diff -Nru simpleparse-2.1.0a1/xml/__init__.py simpleparse-2.2.0/xml/__init__.py --- simpleparse-2.1.0a1/xml/__init__.py 2002-07-17 23:03:23.000000000 +0000 +++ simpleparse-2.2.0/xml/__init__.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,11 +0,0 @@ -"""XML Parsing package - -At the moment it's really limited, -but it does the basics, and the rest -is mostly just a matter of fiddling -about with Unicode and CharacterType -support. There is only very minimal -support for Reference types, basically -we note that a Reference exists, but -don't do any further processing of it. -""" diff -Nru simpleparse-2.1.0a1/xml/xml_parser.py simpleparse-2.2.0/xml/xml_parser.py --- simpleparse-2.1.0a1/xml/xml_parser.py 2002-08-06 01:12:01.000000000 +0000 +++ simpleparse-2.2.0/xml/xml_parser.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,200 +0,0 @@ -"""XML Parser based (loosely) on the XML Spec's EBNF - -This is a hand-coded parser based on the W3C's XML specification, -there was a lot of busy-work rewriting to make the syntax agree, -but also a number of signficant structural changes required by -the limitations of the SimpleParse engine, and the completely -procedural definition of References in the XML spec (the References -don't occur in most places they can occur, and they are seen as -altering the buffer directly as soon as they are encountered, this -isn't something that fits readily into the mx.TextTools engine. - -http://www.w3.org/TR/REC-xml#sec-references - -Major Deviations from Spec: - No support for the unicode-style character classes - No support for UTF-16 (or Unicode at all, for that matter) - No support for References that alter the production - being parsed, so you can't have a Reference to an - item "and" or similar non-structure- - respecting References. References have - particular locations they can occur, and they are - just ignored elsewhere - No support for parsing the contents of References within - the primary parsing pass - No support for excluded start/end tags - Comments allowed in both tags and declarations (but not - inside content-specifiers). - Allows end tags of the form -""" - -declaration = """ - -# Simple (changable) literals -# These should be chosen based on the encoding -# of the file, which is actually embedded in the -# file :( - - := [\x20\x09\x0D\x0A]+ - := [a-zA-Z] - := letter/[_:] - := letter/digit/[-._:] - - -# don't change for XML, but would change for SGML or HTML - := '=' - := '&' - := '%' - := ';' - := ' := '?>' - := '<' - := '>' - := ' := '>' - := '/>' - - -# an XML-comment, note that this follows -# SGML semantics, so that you can embed comment_sets -# in the middle of the various declarations... ->Comment< := "" ->comment_set< := '--', xml_comment,'--' -xml_comment := -'--'* - -# whitespace in tag (including possible comment) ->TS< := (Comment/S)+ - - -# general structures -AttValue := ('"', (Reference/ -[&"] )*, '"') / ( "'", (Reference / -[&'])*, "'") - -# Names -Name := namestart, namechar* -Names := Name, (S,Name)* -Nmtoken := namechar+ -Nmtokens := Nmtoken, (S,Nmtoken)* - -# processing instructions -PI := PIO, PITarget, S?, PIContent, PIC -PIContent := -PIC* -PITarget := ?-( [Xx],[Mm],[Ll]), Name - - -## references - # character reference - CharRef := REFO,'#',('x',hex)/(int),REFC - # entity reference - EntityRef := REFO, Name, REFC - # parsed entity ref - PEReference := PREFO, Name, REFC - -Reference := EntityRef / CharRef - -Misc := Comment/S - -### PROLOG definitions... - - prolog := XMLDecl?, Misc*, (doctypedecl, Misc*)? - XMLDecl := '' - VersionInfo := TS?, 'version', TS?, Eq, TS?, (('"',VersionNum,'"')/("'",VersionNum,"'")) - VersionNum := [a-zA-Z0-9_.:-]+ - - -### Document-type declarations (DTDs) - - doctypedecl := '' - - DeclSep := PEReference / S - markupdecl := elementdecl / AttlistDecl / EntityDecl / NotationDecl / PI / Comment - - EncodingDecl := TS, 'encoding', Eq, (('"', EncName, '"') / ("'", EncName, "'") ) - EncName := [A-Za-z],[A-Za-z0-9._-]* - SDDecl := TS, 'standalone', Eq, (("'", ('yes' / 'no'), "'") / ('"', ('yes' / 'no'), '"')) - - ExternalID := ('SYSTEM', TS?, SystemLiteral) / ('PUBLIC', TS?, PubidLiteral, TS?, SystemLiteral ) / PEReference - NDataDecl := (TS, 'NDATA', TS, Name)/ (TS,PEReference,TS,(Name/ PEReference)?) - - SystemLiteral := ('"', -["]*, '"') / ("'", -[']*, "'") / PEReference - PubidLiteral := ('"', [\x20\x0D\x0Aa-zA-Z0-9'()+,./:=?;!*#@$_%-]*, '"') / ("'", [\x20\x0D\x0Aa-zA-Z0-9()+,./:=?;!*#@$_%-]*, "'") / PEReference - - PublicID := ('PUBLIC', TS, PubidLiteral) / PEReference - - -### Element-type declarations - # hack to try and get PEReference parsing for the "normal case" - # where the PEReference doesn't change the production level, which - # seems to be suggested by the spec... - - elementdecl := '' - - >elementdecl_pe< := (TS, PEReference, TS?, contentspec?) - - contentspec := 'EMPTY' / 'ANY' / Mixed / children - Mixed := ('(', S?, '#PCDATA', (S?, '|', S?, (Name/PEReference))*, S?, ')*' ) /('(', S?, '#PCDATA', S?, ')') - - repetition_specifier := ('?' / '*' / '+')? - children := (choice / seq/ PEReference), repetition_specifier - cp := (choice / seq / Name/ PEReference ), repetition_specifier - choice := '(', S?, cp, ( S?, '|', S?, cp )+, S?, ')' - seq := '(', S?, cp, ( S?, ',', S?, cp )*, S?, ')' - - -### Attribute list declarations... - AttlistDecl := '' - AttDef := TS, ((Name, TS, AttType, TS, DefaultDecl)/(PEReference, TS?, AttType?, TS?, DefaultDecl?)) - - - AttType := StringType / TokenizedType / EnumeratedType/ PEReference - StringType := 'CDATA' - TokenizedType := 'ID' / 'IDREF' / 'IDREFS' / 'ENTITY' / 'ENTITIES' / 'NMTOKEN' / 'NMTOKENS' - EnumeratedType := NotationType / Enumeration - NotationType := 'NOTATION', TS, ('(', NameOrList, ')')/PEReference - Enumeration := '(', (NmTokenOrList/PEReference), ')' - - >NameOrList< := S?, (Name/PEReference), (S?, '|', S?, (Name/PEReference))*, S? - >NmTokenOrList< := S?, (Nmtoken/PEReference), (S?, '|', S?, (Nmtoken/PEReference))*, S? - - - DefaultDecl := '#REQUIRED' / '#IMPLIED' / ((('#FIXED', TS)/PEReference)?, (AttValue/PEReference)) / PEReference - -### Entity declarations - EntityDecl := GEDecl / PEDecl - GEDecl := '' - PEDecl := '' - EntityDef := EntityValue / (ExternalID, NDataDecl?) / PEReference - PEDef := EntityValue / ExternalID / PEReference - EntityValue := ('"', (PEReference / Reference / -[%&"])*, '"') / ("'", (PEReference / Reference / -[%&'])*, "'") - -NotationDecl := '' - -### elements (nodes/tags/you-know :) ) - # limitations in the SimpleParse engine mean that this - # particular structure will be basically useless... - element := EmptyElemTag / (STag, content, ETag) - - EmptyElemTag := STagO, Name, (TS, Attribute)*, TS?, EmptyElemTagC - - STag := STagO, Name, (TS, Attribute)*, TS?, STagC - ETag := ETagO, Name?, TS?, ETagC - - content := (element / Reference / CDSect / PI / Comment / CharData)* - - Attribute := (Name, Eq, (AttValue/Reference))/(Reference,(Eq,(AttValue/Reference))?) - - # general content of an element - CharData := ( -[<&]+ / -(STag / EmptyElemTag / ETag / Reference / CDSect / PI / Comment) )+ - - # special non-parsed character data sections - CDSect := CDStart, CData, CDEnd - := ' := ']]>' - - -document := prolog, element, Misc* -""" -from simpleparse.common import numbers, strings, chartypes