diff -Nru html5lib-0.999999999/AUTHORS.rst html5lib-1.0.1/AUTHORS.rst
--- html5lib-0.999999999/AUTHORS.rst	2016-07-11 23:24:55.000000000 +0000
+++ html5lib-1.0.1/AUTHORS.rst	2017-12-07 12:25:26.000000000 +0000
@@ -6,6 +6,7 @@
 - James Graham
 - Geoffrey Sneddon
 - Łukasz Langa
+- Will Kahn-Greene
 
 
 Patches and suggestions
@@ -23,22 +24,43 @@
 - Philip Taylor
 - Edward Z. Yang
 - fantasai
-- Mike West
 - Philip Jägenstedt
 - Ms2ger
 - Mohammad Taha Jahangir
 - Andy Wingo
-- Juan Carlos Garcia Segovia
 - Andreas Madsack
 - Karim Valiev
+- Juan Carlos Garcia Segovia
+- Mike West
 - Marc DM
+- Simon Sapin
+- Michael[tm] Smith
+- Ritwik Gupta
+- Marc Abramowitz
 - Tony Lopes
 - lilbludevil
-- Simon Sapin
-- Jon Dufresne
+- Kevin
 - Drew Hubl
 - Austin Kumbera
 - Jim Baker
-- Michael[tm] Smith
-- Marc Abramowitz
 - Jon Dufresne
+- Donald Stufft
+- Alex Gaynor
+- Nik Nyby
+- Jakub Wilk
+- Sigmund Cherem
+- Gabi Davar
+- Florian Mounier
+- neumond
+- Vitalik Verhovodov
+- Kovid Goyal
+- Adam Chainz
+- John Vandenberg
+- Eric Amorde
+- Benedikt Morbach
+- Jonathan Vanasco
+- Tom Most
+- Ville Skyttä
+- Hugo van Kemenade
+- Mark Vasilkov
+
diff -Nru html5lib-0.999999999/CHANGES.rst html5lib-1.0.1/CHANGES.rst
--- html5lib-0.999999999/CHANGES.rst	2016-07-15 01:34:57.000000000 +0000
+++ html5lib-1.0.1/CHANGES.rst	2017-12-07 14:09:00.000000000 +0000
@@ -1,6 +1,50 @@
 Change Log
 ----------
 
+1.0.1
+~~~~~
+
+Released on December 7, 2017
+
+Breaking changes:
+
+* Drop support for Python 2.6. (#330) (Thank you, Hugo, Will Kahn-Greene!)
+* Remove ``utils/spider.py`` (#353) (Thank you, Jon Dufresne!)
+
+Features:
+
+* Improve documentation. (#300, #307) (Thank you, Jon Dufresne, Tom Most,
+  Will Kahn-Greene!)
+* Add iframe seamless boolean attribute. (Thank you, Ritwik Gupta!)
+* Add itemscope as a boolean attribute. (#194) (Thank you, Jonathan Vanasco!)
+* Support Python 3.6. (#333) (Thank you, Jon Dufresne!)
+* Add CI support for Windows using AppVeyor. (Thank you, John Vandenberg!)
+* Improve testing and CI and add code coverage (#323, #334), (Thank you, Jon
+  Dufresne, John Vandenberg, Geoffrey Sneddon, Will Kahn-Greene!)
+* Semver-compliant version number.
+
+Bug fixes:
+
+* Add support for setuptools < 18.5 to support environment markers. (Thank you,
+  John Vandenberg!)
+* Add explicit dependency for six >= 1.9. (Thank you, Eric Amorde!)
+* Fix regexes to work with Python 3.7 regex adjustments. (#318, #379) (Thank
+  you, Benedikt Morbach, Ville Skyttä, Mark Vasilkov!)
+* Fix alphabeticalattributes filter namespace bug. (#324) (Thank you, Will
+  Kahn-Greene!)
+* Include license file in generated wheel package. (#350) (Thank you, Jon
+  Dufresne!)
+* Fix annotation-xml typo. (#339) (Thank you, Will Kahn-Greene!)
+* Allow uppercase hex chararcters in CSS colour check. (#377) (Thank you,
+  Komal Dembla, Hugo!)
+
+
+1.0
+~~~
+
+Released and unreleased on December 7, 2017. Badly packaged release.
+
+
 0.999999999/1.0b10
 ~~~~~~~~~~~~~~~~~~
 
@@ -25,7 +69,7 @@
 
 * Cease supporting DATrie under PyPy.
 
-* **Remove ``PullDOM`` support, as this hasn't ever been properly
+* **Remove PullDOM support, as this hasn't ever been properly
   tested, doesn't entirely work, and as far as I can tell is
   completely unused by anyone.**
 
@@ -63,7 +107,7 @@
   to clarify their status as public.**
 
 * **Get rid of the sanitizer package. Merge sanitizer.sanitize into the
-  sanitizer.htmlsanitizer module and move that to saniziter. This means
+  sanitizer.htmlsanitizer module and move that to sanitizer. This means
   anyone who used sanitizer.sanitize or sanitizer.HTMLSanitizer needs no
   code changes.**
 
diff -Nru html5lib-0.999999999/debian/changelog html5lib-1.0.1/debian/changelog
--- html5lib-0.999999999/debian/changelog	2016-11-16 14:34:19.000000000 +0000
+++ html5lib-1.0.1/debian/changelog	2018-07-11 08:30:04.000000000 +0000
@@ -1,3 +1,16 @@
+html5lib (1.0.1-1) unstable; urgency=medium
+
+  [ Ondřej Nový ]
+  * d/control: Set Vcs-* to salsa.debian.org
+  * d/changelog: Remove trailing whitespaces
+  * d/control: Remove ancient X-Python-Version field
+  * d/control: Remove ancient X-Python3-Version field
+
+  [ Alexander GQ Gerasiov ]
+  * New upstream release (Closes: #895816).
+
+ -- Alexander GQ Gerasiov <gq@debian.org>  Wed, 11 Jul 2018 11:30:04 +0300
+
 html5lib (0.999999999-1) unstable; urgency=medium
 
   [ Ondřej Nový ]
@@ -95,16 +108,16 @@
   * Drop patch system and debian/README.source
   * Remove debian/examples - not shipped by upstream anymore.
   * Bump Standards-Version to 3.9.3.
-  * Do not install tests folder. 
+  * Do not install tests folder.
 
  -- Bernd Zeimetz <bzed@debian.org>  Wed, 09 May 2012 22:28:28 +0200
 
 html5lib (0.90-1) unstable; urgency=low
 
-  * New upstream version. 
-  * Updating patch to apply at the new version. 
+  * New upstream version.
+  * Updating patch to apply at the new version.
   * Check for the tests directory before running tests.
-    They're not always included in the source.... 
+    They're not always included in the source....
 
  -- Bernd Zeimetz <bzed@debian.org>  Sun, 24 Jan 2010 01:28:50 +0100
 
@@ -134,7 +147,7 @@
   [ Bernd Zeimetz ]
   * debian/README.source: Add file
   * debian/control: Bump Standards-Version to 3.8.2.
-  * debian/copyright: Updating debian packaging copyright. 
+  * debian/copyright: Updating debian packaging copyright.
 
  -- Bernd Zeimetz <bzed@debian.org>  Wed, 05 Aug 2009 22:12:43 +0200
 
diff -Nru html5lib-0.999999999/debian/control html5lib-1.0.1/debian/control
--- html5lib-0.999999999/debian/control	2016-11-16 14:34:19.000000000 +0000
+++ html5lib-1.0.1/debian/control	2018-07-11 08:21:52.000000000 +0000
@@ -25,12 +25,10 @@
                python3-setuptools,
                python3-six,
                python3-webencodings,
-Vcs-Git: https://anonscm.debian.org/git/python-modules/packages/html5lib.git
-Vcs-Browser: https://anonscm.debian.org/cgit/python-modules/packages/html5lib.git
+Vcs-Git: https://salsa.debian.org/python-team/modules/html5lib.git
+Vcs-Browser: https://salsa.debian.org/python-team/modules/html5lib
 Homepage: https://github.com/html5lib/html5lib-python
 Standards-Version: 3.9.8
-X-Python-Version: >= 2.6
-X-Python3-Version: >= 3.2
 
 Package: python-html5lib
 Architecture: all
diff -Nru html5lib-0.999999999/debian/.git-dpm html5lib-1.0.1/debian/.git-dpm
--- html5lib-0.999999999/debian/.git-dpm	2016-11-16 14:34:19.000000000 +0000
+++ html5lib-1.0.1/debian/.git-dpm	2018-07-11 08:21:52.000000000 +0000
@@ -1,11 +1,11 @@
 # see git-dpm(1) from git-dpm package
-a7d4397597d76c71dd9d232522bad124f0dd68a3
-a7d4397597d76c71dd9d232522bad124f0dd68a3
-a7d4397597d76c71dd9d232522bad124f0dd68a3
-a7d4397597d76c71dd9d232522bad124f0dd68a3
-html5lib_0.999999999.orig.tar.gz
-814e7ab8d865c3c0ba96a13fe383c06735329c36
-245488
+18b20c1acbaf76d21f006ff2f6b9960e4745a764
+18b20c1acbaf76d21f006ff2f6b9960e4745a764
+18b20c1acbaf76d21f006ff2f6b9960e4745a764
+18b20c1acbaf76d21f006ff2f6b9960e4745a764
+html5lib_1.0.1.orig.tar.gz
+5e1a2c7e18de7d1d0883e223f1733dc6dc796ee2
+252959
 debianTag="debian/%e%v"
 patchedTag="patched/%e%v"
 upstreamTag="upstream/%e%u"
diff -Nru html5lib-0.999999999/html5lib/constants.py html5lib-1.0.1/html5lib/constants.py
--- html5lib-0.999999999/html5lib/constants.py	2016-05-22 02:05:51.000000000 +0000
+++ html5lib-1.0.1/html5lib/constants.py	2017-12-07 12:25:26.000000000 +0000
@@ -423,7 +423,7 @@
 ])
 
 htmlIntegrationPointElements = frozenset([
-    (namespaces["mathml"], "annotaion-xml"),
+    (namespaces["mathml"], "annotation-xml"),
     (namespaces["svg"], "foreignObject"),
     (namespaces["svg"], "desc"),
     (namespaces["svg"], "title")
@@ -588,7 +588,7 @@
 ])
 
 booleanAttributes = {
-    "": frozenset(["irrelevant"]),
+    "": frozenset(["irrelevant", "itemscope"]),
     "style": frozenset(["scoped"]),
     "img": frozenset(["ismap"]),
     "audio": frozenset(["autoplay", "controls"]),
@@ -606,6 +606,7 @@
     "input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]),
     "select": frozenset(["disabled", "readonly", "autofocus", "multiple"]),
     "output": frozenset(["disabled", "readonly"]),
+    "iframe": frozenset(["seamless"]),
 }
 
 # entitiesWindows1252 has to be _ordered_ and needs to have an index. It
@@ -2938,8 +2939,9 @@
 
 
 class DataLossWarning(UserWarning):
+    """Raised when the current tree is unable to represent the input data"""
     pass
 
 
-class ReparseException(Exception):
+class _ReparseException(Exception):
     pass
diff -Nru html5lib-0.999999999/html5lib/filters/alphabeticalattributes.py html5lib-1.0.1/html5lib/filters/alphabeticalattributes.py
--- html5lib-0.999999999/html5lib/filters/alphabeticalattributes.py	2016-07-14 19:07:32.000000000 +0000
+++ html5lib-1.0.1/html5lib/filters/alphabeticalattributes.py	2017-12-07 12:25:26.000000000 +0000
@@ -2,19 +2,28 @@
 
 from . import base
 
-try:
-    from collections import OrderedDict
-except ImportError:
-    from ordereddict import OrderedDict
+from collections import OrderedDict
+
+
+def _attr_key(attr):
+    """Return an appropriate key for an attribute for sorting
+
+    Attributes have a namespace that can be either ``None`` or a string. We
+    can't compare the two because they're different types, so we convert
+    ``None`` to an empty string first.
+
+    """
+    return (attr[0][0] or ''), attr[0][1]
 
 
 class Filter(base.Filter):
+    """Alphabetizes attributes for elements"""
     def __iter__(self):
         for token in base.Filter.__iter__(self):
             if token["type"] in ("StartTag", "EmptyTag"):
                 attrs = OrderedDict()
                 for name, value in sorted(token["data"].items(),
-                                          key=lambda x: x[0]):
+                                          key=_attr_key):
                     attrs[name] = value
                 token["data"] = attrs
             yield token
diff -Nru html5lib-0.999999999/html5lib/filters/inject_meta_charset.py html5lib-1.0.1/html5lib/filters/inject_meta_charset.py
--- html5lib-0.999999999/html5lib/filters/inject_meta_charset.py	2016-07-14 19:07:32.000000000 +0000
+++ html5lib-1.0.1/html5lib/filters/inject_meta_charset.py	2017-12-07 12:25:26.000000000 +0000
@@ -4,7 +4,15 @@
 
 
 class Filter(base.Filter):
+    """Injects ``<meta charset=ENCODING>`` tag into head of document"""
     def __init__(self, source, encoding):
+        """Creates a Filter
+
+        :arg source: the source token stream
+
+        :arg encoding: the encoding to set
+
+        """
         base.Filter.__init__(self, source)
         self.encoding = encoding
 
diff -Nru html5lib-0.999999999/html5lib/filters/lint.py html5lib-1.0.1/html5lib/filters/lint.py
--- html5lib-0.999999999/html5lib/filters/lint.py	2016-07-14 19:07:32.000000000 +0000
+++ html5lib-1.0.1/html5lib/filters/lint.py	2017-12-07 12:25:26.000000000 +0000
@@ -10,7 +10,19 @@
 
 
 class Filter(base.Filter):
+    """Lints the token stream for errors
+
+    If it finds any errors, it'll raise an ``AssertionError``.
+
+    """
     def __init__(self, source, require_matching_tags=True):
+        """Creates a Filter
+
+        :arg source: the source token stream
+
+        :arg require_matching_tags: whether or not to require matching tags
+
+        """
         super(Filter, self).__init__(source)
         self.require_matching_tags = require_matching_tags
 
diff -Nru html5lib-0.999999999/html5lib/filters/optionaltags.py html5lib-1.0.1/html5lib/filters/optionaltags.py
--- html5lib-0.999999999/html5lib/filters/optionaltags.py	2016-07-14 19:07:32.000000000 +0000
+++ html5lib-1.0.1/html5lib/filters/optionaltags.py	2017-12-07 12:25:26.000000000 +0000
@@ -4,6 +4,7 @@
 
 
 class Filter(base.Filter):
+    """Removes optional tags from the token stream"""
     def slider(self):
         previous1 = previous2 = None
         for token in self.source:
diff -Nru html5lib-0.999999999/html5lib/filters/sanitizer.py html5lib-1.0.1/html5lib/filters/sanitizer.py
--- html5lib-0.999999999/html5lib/filters/sanitizer.py	2016-07-14 19:07:32.000000000 +0000
+++ html5lib-1.0.1/html5lib/filters/sanitizer.py	2017-12-07 12:25:26.000000000 +0000
@@ -705,7 +705,7 @@
 
 
 class Filter(base.Filter):
-    """ sanitization of XHTML+MathML+SVG and of inline style attributes."""
+    """Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes"""
     def __init__(self,
                  source,
                  allowed_elements=allowed_elements,
@@ -718,6 +718,37 @@
                  attr_val_is_uri=attr_val_is_uri,
                  svg_attr_val_allows_ref=svg_attr_val_allows_ref,
                  svg_allow_local_href=svg_allow_local_href):
+        """Creates a Filter
+
+        :arg allowed_elements: set of elements to allow--everything else will
+            be escaped
+
+        :arg allowed_attributes: set of attributes to allow in
+            elements--everything else will be stripped
+
+        :arg allowed_css_properties: set of CSS properties to allow--everything
+            else will be stripped
+
+        :arg allowed_css_keywords: set of CSS keywords to allow--everything
+            else will be stripped
+
+        :arg allowed_svg_properties: set of SVG properties to allow--everything
+            else will be removed
+
+        :arg allowed_protocols: set of allowed protocols for URIs
+
+        :arg allowed_content_types: set of allowed content types for ``data`` URIs.
+
+        :arg attr_val_is_uri: set of attributes that have URI values--values
+            that have a scheme not listed in ``allowed_protocols`` are removed
+
+        :arg svg_attr_val_allows_ref: set of SVG attributes that can have
+            references
+
+        :arg svg_allow_local_href: set of SVG elements that can have local
+            hrefs--these are removed
+
+        """
         super(Filter, self).__init__(source)
         self.allowed_elements = allowed_elements
         self.allowed_attributes = allowed_attributes
@@ -737,11 +768,11 @@
                 yield token
 
     # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
-    # stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style
-    # attributes are parsed, and a restricted set, # specified by
-    # ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through.
-    # attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified
-    # in ALLOWED_PROTOCOLS are allowed.
+    # stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes
+    # are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and
+    # ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI
+    # are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are
+    # allowed.
     #
     #   sanitize_html('<script> do_nasty_stuff() </script>')
     #    => &lt;script> do_nasty_stuff() &lt;/script>
@@ -782,7 +813,7 @@
                 # characters, nor why we call unescape. I just know it's always been here.
                 # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all
                 # this will do is remove *more* than it otherwise would.
-                val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\s]+", '',
+                val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '',
                                        unescape(attrs[attr])).lower()
                 # remove replacement characters from unescaped characters
                 val_unescaped = val_unescaped.replace("\ufffd", "")
@@ -807,7 +838,7 @@
                                          ' ',
                                          unescape(attrs[attr]))
             if (token["name"] in self.svg_allow_local_href and
-                (namespaces['xlink'], 'href') in attrs and re.search('^\s*[^#\s].*',
+                (namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*',
                                                                      attrs[(namespaces['xlink'], 'href')])):
                 del attrs[(namespaces['xlink'], 'href')]
             if (None, 'style') in attrs:
@@ -837,16 +868,16 @@
 
     def sanitize_css(self, style):
         # disallow urls
-        style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
+        style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
 
         # gauntlet
-        if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
+        if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
             return ''
-        if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
+        if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
             return ''
 
         clean = []
-        for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style):
+        for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
             if not value:
                 continue
             if prop.lower() in self.allowed_css_properties:
@@ -855,7 +886,7 @@
                                                 'padding']:
                 for keyword in value.split():
                     if keyword not in self.allowed_css_keywords and \
-                            not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):  # noqa
+                            not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):  # noqa
                         break
                 else:
                     clean.append(prop + ': ' + value + ';')
diff -Nru html5lib-0.999999999/html5lib/filters/whitespace.py html5lib-1.0.1/html5lib/filters/whitespace.py
--- html5lib-0.999999999/html5lib/filters/whitespace.py	2016-07-14 19:07:32.000000000 +0000
+++ html5lib-1.0.1/html5lib/filters/whitespace.py	2017-12-07 12:25:26.000000000 +0000
@@ -10,7 +10,7 @@
 
 
 class Filter(base.Filter):
-
+    """Collapses whitespace except in pre, textarea, and script elements"""
     spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
 
     def __iter__(self):
diff -Nru html5lib-0.999999999/html5lib/html5parser.py html5lib-1.0.1/html5lib/html5parser.py
--- html5lib-0.999999999/html5lib/html5parser.py	2016-07-15 01:34:57.000000000 +0000
+++ html5lib-1.0.1/html5lib/html5parser.py	2017-12-07 12:25:26.000000000 +0000
@@ -1,12 +1,8 @@
 from __future__ import absolute_import, division, unicode_literals
-from six import with_metaclass, viewkeys, PY3
+from six import with_metaclass, viewkeys
 
 import types
-
-try:
-    from collections import OrderedDict
-except ImportError:
-    from ordereddict import OrderedDict
+from collections import OrderedDict
 
 from . import _inputstream
 from . import _tokenizer
@@ -24,18 +20,53 @@
     adjustForeignAttributes as adjustForeignAttributesMap,
     adjustMathMLAttributes, adjustSVGAttributes,
     E,
-    ReparseException
+    _ReparseException
 )
 
 
 def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs):
-    """Parse a string or file-like object into a tree"""
+    """Parse an HTML document as a string or file-like object into a tree
+
+    :arg doc: the document to parse as a string or file-like object
+
+    :arg treebuilder: the treebuilder to use when parsing
+
+    :arg namespaceHTMLElements: whether or not to namespace HTML elements
+
+    :returns: parsed tree
+
+    Example:
+
+    >>> from html5lib.html5parser import parse
+    >>> parse('<html><body><p>This is a doc</p></body></html>')
+    <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0>
+
+    """
     tb = treebuilders.getTreeBuilder(treebuilder)
     p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
     return p.parse(doc, **kwargs)
 
 
 def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs):
+    """Parse an HTML fragment as a string or file-like object into a tree
+
+    :arg doc: the fragment to parse as a string or file-like object
+
+    :arg container: the container context to parse the fragment in
+
+    :arg treebuilder: the treebuilder to use when parsing
+
+    :arg namespaceHTMLElements: whether or not to namespace HTML elements
+
+    :returns: parsed tree
+
+    Example:
+
+    >>> from html5lib.html5libparser import parseFragment
+    >>> parseFragment('<b>this is a fragment</b>')
+    <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090>
+
+    """
     tb = treebuilders.getTreeBuilder(treebuilder)
     p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
     return p.parseFragment(doc, container=container, **kwargs)
@@ -54,16 +85,30 @@
 
 
 class HTMLParser(object):
-    """HTML parser. Generates a tree structure from a stream of (possibly
-        malformed) HTML"""
+    """HTML parser
+
+    Generates a tree structure from a stream of (possibly malformed) HTML.
+
+    """
 
     def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False):
         """
-        strict - raise an exception when a parse error is encountered
+        :arg tree: a treebuilder class controlling the type of tree that will be
+            returned. Built in treebuilders can be accessed through
+            html5lib.treebuilders.getTreeBuilder(treeType)
+
+        :arg strict: raise an exception when a parse error is encountered
+
+        :arg namespaceHTMLElements: whether or not to namespace HTML elements
+
+        :arg debug: whether or not to enable debug mode which logs things
+
+        Example:
+
+        >>> from html5lib.html5parser import HTMLParser
+        >>> parser = HTMLParser()                     # generates parser with etree builder
+        >>> parser = HTMLParser('lxml', strict=True)  # generates parser with lxml builder which is strict
 
-        tree - a treebuilder class controlling the type of tree that will be
-        returned. Built in treebuilders can be accessed through
-        html5lib.treebuilders.getTreeBuilder(treeType)
         """
 
         # Raise an exception on the first error encountered
@@ -87,7 +132,7 @@
 
         try:
             self.mainLoop()
-        except ReparseException:
+        except _ReparseException:
             self.reset()
             self.mainLoop()
 
@@ -127,9 +172,8 @@
 
     @property
     def documentEncoding(self):
-        """The name of the character encoding
-        that was used to decode the input stream,
-        or :obj:`None` if that is not determined yet.
+        """Name of the character encoding that was used to decode the input stream, or
+        :obj:`None` if that is not determined yet
 
         """
         if not hasattr(self, 'tokenizer'):
@@ -223,14 +267,24 @@
     def parse(self, stream, *args, **kwargs):
         """Parse a HTML document into a well-formed tree
 
-        stream - a filelike object or string containing the HTML to be parsed
+        :arg stream: a file-like object or string containing the HTML to be parsed
+
+            The optional encoding parameter must be a string that indicates
+            the encoding.  If specified, that encoding will be used,
+            regardless of any BOM or later declaration (such as in a meta
+            element).
+
+        :arg scripting: treat noscript elements as if JavaScript was turned on
 
-        The optional encoding parameter must be a string that indicates
-        the encoding.  If specified, that encoding will be used,
-        regardless of any BOM or later declaration (such as in a meta
-        element)
+        :returns: parsed tree
+
+        Example:
+
+        >>> from html5lib.html5parser import HTMLParser
+        >>> parser = HTMLParser()
+        >>> parser.parse('<html><body><p>This is a doc</p></body></html>')
+        <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0>
 
-        scripting - treat noscript elements as if javascript was turned on
         """
         self._parse(stream, False, None, *args, **kwargs)
         return self.tree.getDocument()
@@ -238,17 +292,27 @@
     def parseFragment(self, stream, *args, **kwargs):
         """Parse a HTML fragment into a well-formed tree fragment
 
-        container - name of the element we're setting the innerHTML property
-        if set to None, default to 'div'
+        :arg container: name of the element we're setting the innerHTML
+            property if set to None, default to 'div'
+
+        :arg stream: a file-like object or string containing the HTML to be parsed
+
+            The optional encoding parameter must be a string that indicates
+            the encoding.  If specified, that encoding will be used,
+            regardless of any BOM or later declaration (such as in a meta
+            element)
 
-        stream - a filelike object or string containing the HTML to be parsed
+        :arg scripting: treat noscript elements as if JavaScript was turned on
 
-        The optional encoding parameter must be a string that indicates
-        the encoding.  If specified, that encoding will be used,
-        regardless of any BOM or later declaration (such as in a meta
-        element)
+        :returns: parsed tree
+
+        Example:
+
+        >>> from html5lib.html5libparser import HTMLParser
+        >>> parser = HTMLParser()
+        >>> parser.parseFragment('<b>this is a fragment</b>')
+        <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090>
 
-        scripting - treat noscript elements as if javascript was turned on
         """
         self._parse(stream, True, *args, **kwargs)
         return self.tree.getFragment()
@@ -262,8 +326,7 @@
             raise ParseError(E[errorcode] % datavars)
 
     def normalizeToken(self, token):
-        """ HTML5 specific normalizations to the token stream """
-
+        # HTML5 specific normalizations to the token stream
         if token["type"] == tokenTypes["StartTag"]:
             raw = token["data"]
             token["data"] = OrderedDict(raw)
@@ -331,9 +394,7 @@
         self.phase = new_phase
 
     def parseRCDataRawtext(self, token, contentType):
-        """Generic RCDATA/RAWTEXT Parsing algorithm
-        contentType - RCDATA or RAWTEXT
-        """
+        # Generic RCDATA/RAWTEXT Parsing algorithm
         assert contentType in ("RAWTEXT", "RCDATA")
 
         self.tree.insertElement(token)
@@ -2711,10 +2772,7 @@
 
 
 def adjust_attributes(token, replacements):
-    if PY3 or _utils.PY27:
-        needs_adjustment = viewkeys(token['data']) & viewkeys(replacements)
-    else:
-        needs_adjustment = frozenset(token['data']) & frozenset(replacements)
+    needs_adjustment = viewkeys(token['data']) & viewkeys(replacements)
     if needs_adjustment:
         token['data'] = OrderedDict((replacements.get(k, k), v)
                                     for k, v in token['data'].items())
diff -Nru html5lib-0.999999999/html5lib/_ihatexml.py html5lib-1.0.1/html5lib/_ihatexml.py
--- html5lib-0.999999999/html5lib/_ihatexml.py	2016-07-14 19:07:32.000000000 +0000
+++ html5lib-1.0.1/html5lib/_ihatexml.py	2017-12-07 12:25:26.000000000 +0000
@@ -180,7 +180,7 @@
 nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')  # noqa
 
 # Simpler things
-nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\-\'()+,./:=?;!*#@$_%]")
+nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]")
 
 
 class InfosetFilter(object):
diff -Nru html5lib-0.999999999/html5lib/__init__.py html5lib-1.0.1/html5lib/__init__.py
--- html5lib-0.999999999/html5lib/__init__.py	2016-07-15 01:37:45.000000000 +0000
+++ html5lib-1.0.1/html5lib/__init__.py	2017-12-07 14:07:38.000000000 +0000
@@ -1,14 +1,23 @@
 """
-HTML parsing library based on the WHATWG "HTML5"
-specification. The parser is designed to be compatible with existing
-HTML found in the wild and implements well-defined error recovery that
+HTML parsing library based on the `WHATWG HTML specification
+<https://whatwg.org/html>`_. The parser is designed to be compatible with
+existing HTML found in the wild and implements well-defined error recovery that
 is largely compatible with modern desktop web browsers.
 
-Example usage:
+Example usage::
 
-import html5lib
-f = open("my_document.html")
-tree = html5lib.parse(f)
+    import html5lib
+    with open("my_document.html", "rb") as f:
+        tree = html5lib.parse(f)
+
+For convenience, this module re-exports the following names:
+
+* :func:`~.html5parser.parse`
+* :func:`~.html5parser.parseFragment`
+* :class:`~.html5parser.HTMLParser`
+* :func:`~.treebuilders.getTreeBuilder`
+* :func:`~.treewalkers.getTreeWalker`
+* :func:`~.serializer.serialize`
 """
 
 from __future__ import absolute_import, division, unicode_literals
@@ -22,4 +31,5 @@
            "getTreeWalker", "serialize"]
 
 # this has to be at the top level, see how setup.py parses this
-__version__ = "0.999999999"
+#: Distribution version number.
+__version__ = "1.0.1"
diff -Nru html5lib-0.999999999/html5lib/_inputstream.py html5lib-1.0.1/html5lib/_inputstream.py
--- html5lib-0.999999999/html5lib/_inputstream.py	2016-07-14 19:07:32.000000000 +0000
+++ html5lib-1.0.1/html5lib/_inputstream.py	2017-12-07 12:25:26.000000000 +0000
@@ -9,7 +9,7 @@
 import webencodings
 
 from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
-from .constants import ReparseException
+from .constants import _ReparseException
 from . import _utils
 
 from io import StringIO
@@ -48,7 +48,7 @@
                                   0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
                                   0x10FFFE, 0x10FFFF])
 
-ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]")
+ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]")
 
 # Cache for charsUntil()
 charsUntilRegEx = {}
@@ -530,7 +530,7 @@
             self.rawStream.seek(0)
             self.charEncoding = (newEncoding, "certain")
             self.reset()
-            raise ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
+            raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
 
     def detectBOM(self):
         """Attempts to detect at BOM at the start of the stream. If
diff -Nru html5lib-0.999999999/html5lib/serializer.py html5lib-1.0.1/html5lib/serializer.py
--- html5lib-0.999999999/html5lib/serializer.py	2016-07-14 19:07:32.000000000 +0000
+++ html5lib-1.0.1/html5lib/serializer.py	2017-12-07 12:25:26.000000000 +0000
@@ -68,10 +68,33 @@
     else:
         return xmlcharrefreplace_errors(exc)
 
+
 register_error("htmlentityreplace", htmlentityreplace_errors)
 
 
 def serialize(input, tree="etree", encoding=None, **serializer_opts):
+    """Serializes the input token stream using the specified treewalker
+
+    :arg input: the token stream to serialize
+
+    :arg tree: the treewalker to use
+
+    :arg encoding: the encoding to use
+
+    :arg serializer_opts: any options to pass to the
+        :py:class:`html5lib.serializer.HTMLSerializer` that gets created
+
+    :returns: the tree serialized as a string
+
+    Example:
+
+    >>> from html5lib.html5parser import parse
+    >>> from html5lib.serializer import serialize
+    >>> token_stream = parse('<html><body><p>Hi!</p></body></html>')
+    >>> serialize(token_stream, omit_optional_tags=False)
+    '<html><head></head><body><p>Hi!</p></body></html>'
+
+    """
     # XXX: Should we cache this?
     walker = treewalkers.getTreeWalker(tree)
     s = HTMLSerializer(**serializer_opts)
@@ -110,50 +133,83 @@
                "strip_whitespace", "sanitize")
 
     def __init__(self, **kwargs):
-        """Initialize HTMLSerializer.
+        """Initialize HTMLSerializer
+
+        :arg inject_meta_charset: Whether or not to inject the meta charset.
+
+            Defaults to ``True``.
+
+        :arg quote_attr_values: Whether to quote attribute values that don't
+            require quoting per legacy browser behavior (``"legacy"``), when
+            required by the standard (``"spec"``), or always (``"always"``).
+
+            Defaults to ``"legacy"``.
+
+        :arg quote_char: Use given quote character for attribute quoting.
+
+            Defaults to ``"`` which will use double quotes unless attribute
+            value contains a double quote, in which case single quotes are
+            used.
+
+        :arg escape_lt_in_attrs: Whether or not to escape ``<`` in attribute
+            values.
+
+            Defaults to ``False``.
+
+        :arg escape_rcdata: Whether to escape characters that need to be
+            escaped within normal elements within rcdata elements such as
+            style.
+
+            Defaults to ``False``.
+
+        :arg resolve_entities: Whether to resolve named character entities that
+            appear in the source tree. The XML predefined entities &lt; &gt;
+            &amp; &quot; &apos; are unaffected by this setting.
+
+            Defaults to ``True``.
+
+        :arg strip_whitespace: Whether to remove semantically meaningless
+            whitespace. (This compresses all whitespace to a single space
+            except within ``pre``.)
 
-        Keyword options (default given first unless specified) include:
+            Defaults to ``False``.
 
-        inject_meta_charset=True|False
-          Whether it insert a meta element to define the character set of the
-          document.
-        quote_attr_values="legacy"|"spec"|"always"
-          Whether to quote attribute values that don't require quoting
-          per legacy browser behaviour, when required by the standard, or always.
-        quote_char=u'"'|u"'"
-          Use given quote character for attribute quoting. Default is to
-          use double quote unless attribute value contains a double quote,
-          in which case single quotes are used instead.
-        escape_lt_in_attrs=False|True
-          Whether to escape < in attribute values.
-        escape_rcdata=False|True
-          Whether to escape characters that need to be escaped within normal
-          elements within rcdata elements such as style.
-        resolve_entities=True|False
-          Whether to resolve named character entities that appear in the
-          source tree. The XML predefined entities &lt; &gt; &amp; &quot; &apos;
-          are unaffected by this setting.
-        strip_whitespace=False|True
-          Whether to remove semantically meaningless whitespace. (This
-          compresses all whitespace to a single space except within pre.)
-        minimize_boolean_attributes=True|False
-          Shortens boolean attributes to give just the attribute value,
-          for example <input disabled="disabled"> becomes <input disabled>.
-        use_trailing_solidus=False|True
-          Includes a close-tag slash at the end of the start tag of void
-          elements (empty elements whose end tag is forbidden). E.g. <hr/>.
-        space_before_trailing_solidus=True|False
-          Places a space immediately before the closing slash in a tag
-          using a trailing solidus. E.g. <hr />. Requires use_trailing_solidus.
-        sanitize=False|True
-          Strip all unsafe or unknown constructs from output.
-          See `html5lib user documentation`_
-        omit_optional_tags=True|False
-          Omit start/end tags that are optional.
-        alphabetical_attributes=False|True
-          Reorder attributes to be in alphabetical order.
+        :arg minimize_boolean_attributes: Shortens boolean attributes to give
+            just the attribute value, for example::
+
+              <input disabled="disabled">
+
+            becomes::
+
+              <input disabled>
+
+            Defaults to ``True``.
+
+        :arg use_trailing_solidus: Includes a close-tag slash at the end of the
+            start tag of void elements (empty elements whose end tag is
+            forbidden). E.g. ``<hr/>``.
+
+            Defaults to ``False``.
+
+        :arg space_before_trailing_solidus: Places a space immediately before
+            the closing slash in a tag using a trailing solidus. E.g.
+            ``<hr />``. Requires ``use_trailing_solidus=True``.
+
+            Defaults to ``True``.
+
+        :arg sanitize: Strip all unsafe or unknown constructs from output.
+            See :py:class:`html5lib.filters.sanitizer.Filter`.
+
+            Defaults to ``False``.
+
+        :arg omit_optional_tags: Omit start/end tags that are optional.
+
+            Defaults to ``True``.
+
+        :arg alphabetical_attributes: Reorder attributes to be in alphabetical order.
+
+            Defaults to ``False``.
 
-        .. _html5lib user documentation: http://code.google.com/p/html5lib/wiki/UserDocumentation
         """
         unexpected_args = frozenset(kwargs) - frozenset(self.options)
         if len(unexpected_args) > 0:
@@ -317,6 +373,25 @@
                 self.serializeError(token["data"])
 
     def render(self, treewalker, encoding=None):
+        """Serializes the stream from the treewalker into a string
+
+        :arg treewalker: the treewalker to serialize
+
+        :arg encoding: the string encoding to use
+
+        :returns: the serialized tree
+
+        Example:
+
+        >>> from html5lib import parse, getTreeWalker
+        >>> from html5lib.serializer import HTMLSerializer
+        >>> token_stream = parse('<html><body>Hi!</body></html>')
+        >>> walker = getTreeWalker('etree')
+        >>> serializer = HTMLSerializer(omit_optional_tags=False)
+        >>> serializer.render(walker(token_stream))
+        '<html><head></head><body>Hi!</body></html>'
+
+        """
         if encoding:
             return b"".join(list(self.serialize(treewalker, encoding)))
         else:
diff -Nru html5lib-0.999999999/html5lib/tests/conftest.py html5lib-1.0.1/html5lib/tests/conftest.py
--- html5lib-0.999999999/html5lib/tests/conftest.py	2016-07-11 23:24:55.000000000 +0000
+++ html5lib-1.0.1/html5lib/tests/conftest.py	2017-12-07 12:25:26.000000000 +0000
@@ -1,4 +1,6 @@
+from __future__ import print_function
 import os.path
+import sys
 
 import pkg_resources
 import pytest
@@ -15,6 +17,26 @@
 _sanitizer_testdata = os.path.join(_dir, "sanitizer-testdata")
 
 
+def fail_if_missing_pytest_expect():
+    """Throws an exception halting pytest if pytest-expect isn't working"""
+    try:
+        from pytest_expect import expect  # noqa
+    except ImportError:
+        header = '*' * 78
+        print(
+            '\n' +
+            header + '\n' +
+            'ERROR: Either pytest-expect or its dependency u-msgpack-python is not\n' +
+            'installed. Please install them both before running pytest.\n' +
+            header + '\n',
+            file=sys.stderr
+        )
+        raise
+
+
+fail_if_missing_pytest_expect()
+
+
 def pytest_configure(config):
     msgs = []
 
diff -Nru html5lib-0.999999999/html5lib/tests/test_alphabeticalattributes.py html5lib-1.0.1/html5lib/tests/test_alphabeticalattributes.py
--- html5lib-0.999999999/html5lib/tests/test_alphabeticalattributes.py	1970-01-01 00:00:00.000000000 +0000
+++ html5lib-1.0.1/html5lib/tests/test_alphabeticalattributes.py	2017-12-07 12:25:26.000000000 +0000
@@ -0,0 +1,78 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from collections import OrderedDict
+
+import pytest
+
+import html5lib
+from html5lib.filters.alphabeticalattributes import Filter
+from html5lib.serializer import HTMLSerializer
+
+
+@pytest.mark.parametrize('msg, attrs, expected_attrs', [
+    (
+        'no attrs',
+        {},
+        {}
+    ),
+    (
+        'one attr',
+        {(None, 'alt'): 'image'},
+        OrderedDict([((None, 'alt'), 'image')])
+    ),
+    (
+        'multiple attrs',
+        {
+            (None, 'src'): 'foo',
+            (None, 'alt'): 'image',
+            (None, 'style'): 'border: 1px solid black;'
+        },
+        OrderedDict([
+            ((None, 'alt'), 'image'),
+            ((None, 'src'), 'foo'),
+            ((None, 'style'), 'border: 1px solid black;')
+        ])
+    ),
+])
+def test_alphabetizing(msg, attrs, expected_attrs):
+    tokens = [{'type': 'StartTag', 'name': 'img', 'data': attrs}]
+    output_tokens = list(Filter(tokens))
+
+    attrs = output_tokens[0]['data']
+    assert attrs == expected_attrs
+
+
+def test_with_different_namespaces():
+    tokens = [{
+        'type': 'StartTag',
+        'name': 'pattern',
+        'data': {
+            (None, 'id'): 'patt1',
+            ('http://www.w3.org/1999/xlink', 'href'): '#patt2'
+        }
+    }]
+    output_tokens = list(Filter(tokens))
+
+    attrs = output_tokens[0]['data']
+    assert attrs == OrderedDict([
+        ((None, 'id'), 'patt1'),
+        (('http://www.w3.org/1999/xlink', 'href'), '#patt2')
+    ])
+
+
+def test_with_serializer():
+    """Verify filter works in the context of everything else"""
+    parser = html5lib.HTMLParser()
+    dom = parser.parseFragment('<svg><pattern xlink:href="#patt2" id="patt1"></svg>')
+    walker = html5lib.getTreeWalker('etree')
+    ser = HTMLSerializer(
+        alphabetical_attributes=True,
+        quote_attr_values='always'
+    )
+
+    # FIXME(willkg): The "xlink" namespace gets dropped by the serializer. When
+    # that gets fixed, we can fix this expected result.
+    assert (
+        ser.render(walker(dom)) ==
+        '<svg><pattern id="patt1" href="#patt2"></pattern></svg>'
+    )
diff -Nru html5lib-0.999999999/html5lib/tests/testdata/.git html5lib-1.0.1/html5lib/tests/testdata/.git
--- html5lib-0.999999999/html5lib/tests/testdata/.git	2013-04-09 23:43:27.000000000 +0000
+++ html5lib-1.0.1/html5lib/tests/testdata/.git	2017-12-07 13:52:47.000000000 +0000
@@ -1 +1 @@
-gitdir: ../../../.git/modules/testdata
+gitdir: ../../../../html5lib-python/.git/worktrees/html5lib-python-b/modules/testdata
diff -Nru html5lib-0.999999999/html5lib/tests/test_sanitizer.py html5lib-1.0.1/html5lib/tests/test_sanitizer.py
--- html5lib-0.999999999/html5lib/tests/test_sanitizer.py	2016-07-12 01:44:10.000000000 +0000
+++ html5lib-1.0.1/html5lib/tests/test_sanitizer.py	2017-12-07 12:25:26.000000000 +0000
@@ -113,3 +113,15 @@
         yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
                "<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
                """<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))
+
+
+def test_lowercase_color_codes_in_style():
+    sanitized = sanitize_html("<p style=\"border: 1px solid #a2a2a2;\"></p>")
+    expected = '<p style=\"border: 1px solid #a2a2a2;\"></p>'
+    assert expected == sanitized
+
+
+def test_uppercase_color_codes_in_style():
+    sanitized = sanitize_html("<p style=\"border: 1px solid #A2A2A2;\"></p>")
+    expected = '<p style=\"border: 1px solid #A2A2A2;\"></p>'
+    assert expected == sanitized
diff -Nru html5lib-0.999999999/html5lib/treeadapters/genshi.py html5lib-1.0.1/html5lib/treeadapters/genshi.py
--- html5lib-0.999999999/html5lib/treeadapters/genshi.py	2016-05-20 14:50:15.000000000 +0000
+++ html5lib-1.0.1/html5lib/treeadapters/genshi.py	2017-12-07 12:25:26.000000000 +0000
@@ -5,6 +5,13 @@
 
 
 def to_genshi(walker):
+    """Convert a tree to a genshi tree
+
+    :arg walker: the treewalker to use to walk the tree to convert it
+
+    :returns: generator of genshi nodes
+
+    """
     text = []
     for token in walker:
         type = token["type"]
diff -Nru html5lib-0.999999999/html5lib/treeadapters/__init__.py html5lib-1.0.1/html5lib/treeadapters/__init__.py
--- html5lib-0.999999999/html5lib/treeadapters/__init__.py	2016-05-22 01:09:33.000000000 +0000
+++ html5lib-1.0.1/html5lib/treeadapters/__init__.py	2017-12-07 12:25:26.000000000 +0000
@@ -1,3 +1,21 @@
+"""Tree adapters let you convert from one tree structure to another
+
+Example:
+
+.. code-block:: python
+
+   import html5lib
+   from html5lib.treeadapters import genshi
+
+   doc = '<html><body>Hi!</body></html>'
+   treebuilder = html5lib.getTreeBuilder('etree')
+   parser = html5lib.HTMLParser(tree=treebuilder)
+   tree = parser.parse(doc)
+   TreeWalker = html5lib.getTreeWalker('etree')
+
+   genshi_tree = genshi.to_genshi(TreeWalker(tree))
+
+"""
 from __future__ import absolute_import, division, unicode_literals
 
 from . import sax
diff -Nru html5lib-0.999999999/html5lib/treeadapters/sax.py html5lib-1.0.1/html5lib/treeadapters/sax.py
--- html5lib-0.999999999/html5lib/treeadapters/sax.py	2015-04-26 02:17:12.000000000 +0000
+++ html5lib-1.0.1/html5lib/treeadapters/sax.py	2017-12-07 12:25:26.000000000 +0000
@@ -11,7 +11,13 @@
 
 
 def to_sax(walker, handler):
-    """Call SAX-like content handler based on treewalker walker"""
+    """Call SAX-like content handler based on treewalker walker
+
+    :arg walker: the treewalker to use to walk the tree to convert it
+
+    :arg handler: SAX handler to use
+
+    """
     handler.startDocument()
     for prefix, namespace in prefix_mapping.items():
         handler.startPrefixMapping(prefix, namespace)
diff -Nru html5lib-0.999999999/html5lib/treebuilders/base.py html5lib-1.0.1/html5lib/treebuilders/base.py
--- html5lib-0.999999999/html5lib/treebuilders/base.py	2016-07-14 19:07:32.000000000 +0000
+++ html5lib-1.0.1/html5lib/treebuilders/base.py	2017-12-07 12:25:26.000000000 +0000
@@ -21,22 +21,25 @@
 
 
 class Node(object):
+    """Represents an item in the tree"""
     def __init__(self, name):
-        """Node representing an item in the tree.
-        name - The tag name associated with the node
-        parent - The parent of the current node (or None for the document node)
-        value - The value of the current node (applies to text nodes and
-        comments
-        attributes - a dict holding name, value pairs for attributes of the node
-        childNodes - a list of child nodes of the current node. This must
-        include all elements but not necessarily other node types
-        _flags - A list of miscellaneous flags that can be set on the node
+        """Creates a Node
+
+        :arg name: The tag name associated with the node
+
         """
+        # The tag name assocaited with the node
         self.name = name
+        # The parent of the current node (or None for the document node)
         self.parent = None
+        # The value of the current node (applies to text nodes and comments)
         self.value = None
+        # A dict holding name -> value pairs for attributes of the node
         self.attributes = {}
+        # A list of child nodes of the current node. This must include all
+        # elements but not necessarily other node types.
         self.childNodes = []
+        # A list of miscellaneous flags that can be set on the node.
         self._flags = []
 
     def __str__(self):
@@ -53,23 +56,41 @@
 
     def appendChild(self, node):
         """Insert node as a child of the current node
+
+        :arg node: the node to insert
+
         """
         raise NotImplementedError
 
     def insertText(self, data, insertBefore=None):
         """Insert data as text in the current node, positioned before the
         start of node insertBefore or to the end of the node's text.
+
+        :arg data: the data to insert
+
+        :arg insertBefore: True if you want to insert the text before the node
+            and False if you want to insert it after the node
+
         """
         raise NotImplementedError
 
     def insertBefore(self, node, refNode):
         """Insert node as a child of the current node, before refNode in the
         list of child nodes. Raises ValueError if refNode is not a child of
-        the current node"""
+        the current node
+
+        :arg node: the node to insert
+
+        :arg refNode: the child node to insert the node before
+
+        """
         raise NotImplementedError
 
     def removeChild(self, node):
         """Remove node from the children of the current node
+
+        :arg node: the child node to remove
+
         """
         raise NotImplementedError
 
@@ -77,6 +98,9 @@
         """Move all the children of the current node to newParent.
         This is needed so that trees that don't store text as nodes move the
         text in the correct way
+
+        :arg newParent: the node to move all this node's children to
+
         """
         # XXX - should this method be made more general?
         for child in self.childNodes:
@@ -121,10 +145,12 @@
 
 class TreeBuilder(object):
     """Base treebuilder implementation
-    documentClass - the class to use for the bottommost node of a document
-    elementClass - the class to use for HTML Elements
-    commentClass - the class to use for comments
-    doctypeClass - the class to use for doctypes
+
+    * documentClass - the class to use for the bottommost node of a document
+    * elementClass - the class to use for HTML Elements
+    * commentClass - the class to use for comments
+    * doctypeClass - the class to use for doctypes
+
     """
     # pylint:disable=not-callable
 
@@ -144,6 +170,11 @@
     fragmentClass = None
 
     def __init__(self, namespaceHTMLElements):
+        """Create a TreeBuilder
+
+        :arg namespaceHTMLElements: whether or not to namespace HTML elements
+
+        """
         if namespaceHTMLElements:
             self.defaultNamespace = "http://www.w3.org/1999/xhtml"
         else:
@@ -367,11 +398,11 @@
             self.generateImpliedEndTags(exclude)
 
     def getDocument(self):
-        "Return the final tree"
+        """Return the final tree"""
         return self.document
 
     def getFragment(self):
-        "Return the final fragment"
+        """Return the final fragment"""
         # assert self.innerHTML
         fragment = self.fragmentClass()
         self.openElements[0].reparentChildren(fragment)
@@ -379,5 +410,8 @@
 
     def testSerializer(self, node):
         """Serialize the subtree of node in the format required by unit tests
-        node - the node from which to start serializing"""
+
+        :arg node: the node from which to start serializing
+
+        """
         raise NotImplementedError
diff -Nru html5lib-0.999999999/html5lib/treebuilders/etree_lxml.py html5lib-1.0.1/html5lib/treebuilders/etree_lxml.py
--- html5lib-0.999999999/html5lib/treebuilders/etree_lxml.py	2016-07-14 19:07:32.000000000 +0000
+++ html5lib-1.0.1/html5lib/treebuilders/etree_lxml.py	2017-12-07 12:25:26.000000000 +0000
@@ -309,7 +309,6 @@
         super(TreeBuilder, self).insertComment(data, parent)
 
     def insertRoot(self, token):
-        """Create the document root"""
         # Because of the way libxml2 works, it doesn't seem to be possible to
         # alter information like the doctype after the tree has been parsed.
         # Therefore we need to use the built-in parser to create our initial
diff -Nru html5lib-0.999999999/html5lib/treebuilders/__init__.py html5lib-1.0.1/html5lib/treebuilders/__init__.py
--- html5lib-0.999999999/html5lib/treebuilders/__init__.py	2016-07-14 19:07:32.000000000 +0000
+++ html5lib-1.0.1/html5lib/treebuilders/__init__.py	2017-12-07 12:25:26.000000000 +0000
@@ -1,29 +1,32 @@
-"""A collection of modules for building different kinds of tree from
-HTML documents.
+"""A collection of modules for building different kinds of trees from HTML
+documents.
 
 To create a treebuilder for a new type of tree, you need to do
 implement several things:
 
-1) A set of classes for various types of elements: Document, Doctype,
-Comment, Element. These must implement the interface of
-_base.treebuilders.Node (although comment nodes have a different
-signature for their constructor, see treebuilders.etree.Comment)
-Textual content may also be implemented as another node type, or not, as
-your tree implementation requires.
-
-2) A treebuilder object (called TreeBuilder by convention) that
-inherits from treebuilders._base.TreeBuilder. This has 4 required attributes:
-documentClass - the class to use for the bottommost node of a document
-elementClass - the class to use for HTML Elements
-commentClass - the class to use for comments
-doctypeClass - the class to use for doctypes
-It also has one required method:
-getDocument - Returns the root node of the complete document tree
-
-3) If you wish to run the unit tests, you must also create a
-testSerializer method on your treebuilder which accepts a node and
-returns a string containing Node and its children serialized according
-to the format used in the unittests
+1. A set of classes for various types of elements: Document, Doctype, Comment,
+   Element. These must implement the interface of ``base.treebuilders.Node``
+   (although comment nodes have a different signature for their constructor,
+   see ``treebuilders.etree.Comment``) Textual content may also be implemented
+   as another node type, or not, as your tree implementation requires.
+
+2. A treebuilder object (called ``TreeBuilder`` by convention) that inherits
+   from ``treebuilders.base.TreeBuilder``. This has 4 required attributes:
+
+   * ``documentClass`` - the class to use for the bottommost node of a document
+   * ``elementClass`` - the class to use for HTML Elements
+   * ``commentClass`` - the class to use for comments
+   * ``doctypeClass`` - the class to use for doctypes
+
+   It also has one required method:
+
+   * ``getDocument`` - Returns the root node of the complete document tree
+
+3. If you wish to run the unit tests, you must also create a ``testSerializer``
+   method on your treebuilder which accepts a node and returns a string
+   containing Node and its children serialized according to the format used in
+   the unittests
+
 """
 
 from __future__ import absolute_import, division, unicode_literals
@@ -34,23 +37,32 @@
 
 
 def getTreeBuilder(treeType, implementation=None, **kwargs):
-    """Get a TreeBuilder class for various types of tree with built-in support
+    """Get a TreeBuilder class for various types of trees with built-in support
+
+    :arg treeType: the name of the tree type required (case-insensitive). Supported
+        values are:
+
+        * "dom" - A generic builder for DOM implementations, defaulting to a
+          xml.dom.minidom based implementation.
+        * "etree" - A generic builder for tree implementations exposing an
+          ElementTree-like interface, defaulting to xml.etree.cElementTree if
+          available and xml.etree.ElementTree if not.
+        * "lxml" - A etree-based builder for lxml.etree, handling limitations
+          of lxml's implementation.
+
+    :arg implementation: (Currently applies to the "etree" and "dom" tree
+        types). A module implementing the tree type e.g. xml.etree.ElementTree
+        or xml.etree.cElementTree.
+
+    :arg kwargs: Any additional options to pass to the TreeBuilder when
+        creating it.
+
+    Example:
 
-    treeType - the name of the tree type required (case-insensitive). Supported
-               values are:
+    >>> from html5lib.treebuilders import getTreeBuilder
+    >>> builder = getTreeBuilder('etree')
 
-               "dom" - A generic builder for DOM implementations, defaulting to
-                       a xml.dom.minidom based implementation.
-               "etree" - A generic builder for tree implementations exposing an
-                         ElementTree-like interface, defaulting to
-                         xml.etree.cElementTree if available and
-                         xml.etree.ElementTree if not.
-               "lxml" - A etree-based builder for lxml.etree, handling
-                        limitations of lxml's implementation.
-
-    implementation - (Currently applies to the "etree" and "dom" tree types). A
-                      module implementing the tree type e.g.
-                      xml.etree.ElementTree or xml.etree.cElementTree."""
+    """
 
     treeType = treeType.lower()
     if treeType not in treeBuilderCache:
diff -Nru html5lib-0.999999999/html5lib/treewalkers/base.py html5lib-1.0.1/html5lib/treewalkers/base.py
--- html5lib-0.999999999/html5lib/treewalkers/base.py	2016-07-14 19:07:32.000000000 +0000
+++ html5lib-1.0.1/html5lib/treewalkers/base.py	2017-12-07 12:25:26.000000000 +0000
@@ -18,16 +18,48 @@
 
 
 class TreeWalker(object):
+    """Walks a tree yielding tokens
+
+    Tokens are dicts that all have a ``type`` field specifying the type of the
+    token.
+
+    """
     def __init__(self, tree):
+        """Creates a TreeWalker
+
+        :arg tree: the tree to walk
+
+        """
         self.tree = tree
 
     def __iter__(self):
         raise NotImplementedError
 
     def error(self, msg):
+        """Generates an error token with the given message
+
+        :arg msg: the error message
+
+        :returns: SerializeError token
+
+        """
         return {"type": "SerializeError", "data": msg}
 
     def emptyTag(self, namespace, name, attrs, hasChildren=False):
+        """Generates an EmptyTag token
+
+        :arg namespace: the namespace of the token--can be ``None``
+
+        :arg name: the name of the element
+
+        :arg attrs: the attributes of the element as a dict
+
+        :arg hasChildren: whether or not to yield a SerializationError because
+            this tag shouldn't have children
+
+        :returns: EmptyTag token
+
+        """
         yield {"type": "EmptyTag", "name": name,
                "namespace": namespace,
                "data": attrs}
@@ -35,17 +67,61 @@
             yield self.error("Void element has children")
 
     def startTag(self, namespace, name, attrs):
+        """Generates a StartTag token
+
+        :arg namespace: the namespace of the token--can be ``None``
+
+        :arg name: the name of the element
+
+        :arg attrs: the attributes of the element as a dict
+
+        :returns: StartTag token
+
+        """
         return {"type": "StartTag",
                 "name": name,
                 "namespace": namespace,
                 "data": attrs}
 
     def endTag(self, namespace, name):
+        """Generates an EndTag token
+
+        :arg namespace: the namespace of the token--can be ``None``
+
+        :arg name: the name of the element
+
+        :returns: EndTag token
+
+        """
         return {"type": "EndTag",
                 "name": name,
                 "namespace": namespace}
 
     def text(self, data):
+        """Generates SpaceCharacters and Characters tokens
+
+        Depending on what's in the data, this generates one or more
+        ``SpaceCharacters`` and ``Characters`` tokens.
+
+        For example:
+
+            >>> from html5lib.treewalkers.base import TreeWalker
+            >>> # Give it an empty tree just so it instantiates
+            >>> walker = TreeWalker([])
+            >>> list(walker.text(''))
+            []
+            >>> list(walker.text('  '))
+            [{u'data': '  ', u'type': u'SpaceCharacters'}]
+            >>> list(walker.text(' abc '))  # doctest: +NORMALIZE_WHITESPACE
+            [{u'data': ' ', u'type': u'SpaceCharacters'},
+            {u'data': u'abc', u'type': u'Characters'},
+            {u'data': u' ', u'type': u'SpaceCharacters'}]
+
+        :arg data: the text data
+
+        :returns: one or more ``SpaceCharacters`` and ``Characters`` tokens
+
+        """
         data = data
         middle = data.lstrip(spaceCharacters)
         left = data[:len(data) - len(middle)]
@@ -60,18 +136,44 @@
             yield {"type": "SpaceCharacters", "data": right}
 
     def comment(self, data):
+        """Generates a Comment token
+
+        :arg data: the comment
+
+        :returns: Comment token
+
+        """
         return {"type": "Comment", "data": data}
 
     def doctype(self, name, publicId=None, systemId=None):
+        """Generates a Doctype token
+
+        :arg name:
+
+        :arg publicId:
+
+        :arg systemId:
+
+        :returns: the Doctype token
+
+        """
         return {"type": "Doctype",
                 "name": name,
                 "publicId": publicId,
                 "systemId": systemId}
 
     def entity(self, name):
+        """Generates an Entity token
+
+        :arg name: the entity name
+
+        :returns: an Entity token
+
+        """
         return {"type": "Entity", "name": name}
 
     def unknown(self, nodeType):
+        """Handles unknown node types"""
         return self.error("Unknown node type: " + nodeType)
 
 
diff -Nru html5lib-0.999999999/html5lib/treewalkers/etree.py html5lib-1.0.1/html5lib/treewalkers/etree.py
--- html5lib-0.999999999/html5lib/treewalkers/etree.py	2016-07-14 19:07:32.000000000 +0000
+++ html5lib-1.0.1/html5lib/treewalkers/etree.py	2017-12-07 12:25:26.000000000 +0000
@@ -1,13 +1,6 @@
 from __future__ import absolute_import, division, unicode_literals
 
-try:
-    from collections import OrderedDict
-except ImportError:
-    try:
-        from ordereddict import OrderedDict
-    except ImportError:
-        OrderedDict = dict
-
+from collections import OrderedDict
 import re
 
 from six import string_types
diff -Nru html5lib-0.999999999/html5lib/treewalkers/__init__.py html5lib-1.0.1/html5lib/treewalkers/__init__.py
--- html5lib-0.999999999/html5lib/treewalkers/__init__.py	2016-07-14 19:07:32.000000000 +0000
+++ html5lib-1.0.1/html5lib/treewalkers/__init__.py	2017-12-07 12:25:26.000000000 +0000
@@ -13,7 +13,7 @@
 from .. import constants
 from .._utils import default_etree
 
-__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshi", "etree_lxml"]
+__all__ = ["getTreeWalker", "pprint"]
 
 treeWalkerCache = {}
 
@@ -21,20 +21,25 @@
 def getTreeWalker(treeType, implementation=None, **kwargs):
     """Get a TreeWalker class for various types of tree with built-in support
 
-    Args:
-        treeType (str): the name of the tree type required (case-insensitive).
-            Supported values are:
-
-            - "dom": The xml.dom.minidom DOM implementation
-            - "etree": A generic walker for tree implementations exposing an
-                       elementtree-like interface (known to work with
-                       ElementTree, cElementTree and lxml.etree).
-            - "lxml": Optimized walker for lxml.etree
-            - "genshi": a Genshi stream
-
-        Implementation: A module implementing the tree type e.g.
-            xml.etree.ElementTree or cElementTree (Currently applies to the
-            "etree" tree type only).
+    :arg str treeType: the name of the tree type required (case-insensitive).
+        Supported values are:
+
+        * "dom": The xml.dom.minidom DOM implementation
+        * "etree": A generic walker for tree implementations exposing an
+          elementtree-like interface (known to work with ElementTree,
+          cElementTree and lxml.etree).
+        * "lxml": Optimized walker for lxml.etree
+        * "genshi": a Genshi stream
+
+    :arg implementation: A module implementing the tree type e.g.
+        xml.etree.ElementTree or cElementTree (Currently applies to the "etree"
+        tree type only).
+
+    :arg kwargs: keyword arguments passed to the etree walker--for other
+        walkers, this has no effect
+
+    :returns: a TreeWalker class
+
     """
 
     treeType = treeType.lower()
@@ -73,7 +78,13 @@
 
 
 def pprint(walker):
-    """Pretty printer for tree walkers"""
+    """Pretty printer for tree walkers
+
+    Takes a TreeWalker instance and pretty prints the output of walking the tree.
+
+    :arg walker: a TreeWalker instance
+
+    """
     output = []
     indent = 0
     for token in concatenateCharacterTokens(walker):
diff -Nru html5lib-0.999999999/html5lib/_trie/_base.py html5lib-1.0.1/html5lib/_trie/_base.py
--- html5lib-0.999999999/html5lib/_trie/_base.py	2016-07-14 19:07:32.000000000 +0000
+++ html5lib-1.0.1/html5lib/_trie/_base.py	2017-12-07 12:25:26.000000000 +0000
@@ -13,8 +13,7 @@
         if prefix is None:
             return set(keys)
 
-        # Python 2.6: no set comprehensions
-        return set([x for x in keys if x.startswith(prefix)])
+        return {x for x in keys if x.startswith(prefix)}
 
     def has_keys_with_prefix(self, prefix):
         for key in self.keys():
diff -Nru html5lib-0.999999999/html5lib/_utils.py html5lib-1.0.1/html5lib/_utils.py
--- html5lib-0.999999999/html5lib/_utils.py	2016-07-14 19:07:32.000000000 +0000
+++ html5lib-1.0.1/html5lib/_utils.py	2017-12-07 12:25:26.000000000 +0000
@@ -1,6 +1,5 @@
 from __future__ import absolute_import, division, unicode_literals
 
-import sys
 from types import ModuleType
 
 from six import text_type
@@ -13,11 +12,9 @@
 
 __all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
            "surrogatePairToCodepoint", "moduleFactoryFactory",
-           "supports_lone_surrogates", "PY27"]
+           "supports_lone_surrogates"]
 
 
-PY27 = sys.version_info[0] == 2 and sys.version_info[1] >= 7
-
 # Platforms not supporting lone surrogates (\uD800-\uDFFF) should be
 # caught by the below test. In general this would be any platform
 # using UTF-16 as its encoding of unicode strings, such as
diff -Nru html5lib-0.999999999/html5lib.egg-info/PKG-INFO html5lib-1.0.1/html5lib.egg-info/PKG-INFO
--- html5lib-0.999999999/html5lib.egg-info/PKG-INFO	2016-07-15 01:37:50.000000000 +0000
+++ html5lib-1.0.1/html5lib.egg-info/PKG-INFO	2017-12-07 14:09:53.000000000 +0000
@@ -1,11 +1,12 @@
 Metadata-Version: 1.1
 Name: html5lib
-Version: 0.999999999
+Version: 1.0.1
 Summary: HTML parser based on the WHATWG HTML specification
 Home-page: https://github.com/html5lib/html5lib-python
 Author: James Graham
 Author-email: james@hoppipolla.co.uk
 License: MIT License
+Description-Content-Type: UNKNOWN
 Description: html5lib
         ========
         
@@ -98,7 +99,7 @@
         Installation
         ------------
         
-        html5lib works on CPython 2.6+, CPython 3.3+ and PyPy.  To install it,
+        html5lib works on CPython 2.7+, CPython 3.3+ and PyPy.  To install it,
         use:
         
         .. code-block:: bash
@@ -136,8 +137,7 @@
         -----
         
         Unit tests require the ``pytest`` and ``mock`` libraries and can be
-        run using the ``py.test`` command in the root directory;
-        ``ordereddict`` is required under Python 2.6. All should pass.
+        run using the ``py.test`` command in the root directory.
         
         Test data are contained in a separate `html5lib-tests
         <https://github.com/html5lib/html5lib-tests>`_ repository and included
@@ -162,6 +162,50 @@
         Change Log
         ----------
         
+        1.0.1
+        ~~~~~
+        
+        Released on December 7, 2017
+        
+        Breaking changes:
+        
+        * Drop support for Python 2.6. (#330) (Thank you, Hugo, Will Kahn-Greene!)
+        * Remove ``utils/spider.py`` (#353) (Thank you, Jon Dufresne!)
+        
+        Features:
+        
+        * Improve documentation. (#300, #307) (Thank you, Jon Dufresne, Tom Most,
+          Will Kahn-Greene!)
+        * Add iframe seamless boolean attribute. (Thank you, Ritwik Gupta!)
+        * Add itemscope as a boolean attribute. (#194) (Thank you, Jonathan Vanasco!)
+        * Support Python 3.6. (#333) (Thank you, Jon Dufresne!)
+        * Add CI support for Windows using AppVeyor. (Thank you, John Vandenberg!)
+        * Improve testing and CI and add code coverage (#323, #334), (Thank you, Jon
+          Dufresne, John Vandenberg, Geoffrey Sneddon, Will Kahn-Greene!)
+        * Semver-compliant version number.
+        
+        Bug fixes:
+        
+        * Add support for setuptools < 18.5 to support environment markers. (Thank you,
+          John Vandenberg!)
+        * Add explicit dependency for six >= 1.9. (Thank you, Eric Amorde!)
+        * Fix regexes to work with Python 3.7 regex adjustments. (#318, #379) (Thank
+          you, Benedikt Morbach, Ville Skyttä, Mark Vasilkov!)
+        * Fix alphabeticalattributes filter namespace bug. (#324) (Thank you, Will
+          Kahn-Greene!)
+        * Include license file in generated wheel package. (#350) (Thank you, Jon
+          Dufresne!)
+        * Fix annotation-xml typo. (#339) (Thank you, Will Kahn-Greene!)
+        * Allow uppercase hex chararcters in CSS colour check. (#377) (Thank you,
+          Komal Dembla, Hugo!)
+        
+        
+        1.0
+        ~~~
+        
+        Released and unreleased on December 7, 2017. Badly packaged release.
+        
+        
         0.999999999/1.0b10
         ~~~~~~~~~~~~~~~~~~
         
@@ -186,7 +230,7 @@
         
         * Cease supporting DATrie under PyPy.
         
-        * **Remove ``PullDOM`` support, as this hasn't ever been properly
+        * **Remove PullDOM support, as this hasn't ever been properly
           tested, doesn't entirely work, and as far as I can tell is
           completely unused by anyone.**
         
@@ -224,7 +268,7 @@
           to clarify their status as public.**
         
         * **Get rid of the sanitizer package. Merge sanitizer.sanitize into the
-          sanitizer.htmlsanitizer module and move that to saniziter. This means
+          sanitizer.htmlsanitizer module and move that to sanitizer. This means
           anyone who used sanitizer.sanitize or sanitizer.HTMLSanitizer needs no
           code changes.**
         
@@ -458,11 +502,11 @@
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python
 Classifier: Programming Language :: Python :: 2
-Classifier: Programming Language :: Python :: 2.6
 Classifier: Programming Language :: Python :: 2.7
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.3
 Classifier: Programming Language :: Python :: 3.4
 Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: 3.6
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Classifier: Topic :: Text Processing :: Markup :: HTML
diff -Nru html5lib-0.999999999/html5lib.egg-info/requires.txt html5lib-1.0.1/html5lib.egg-info/requires.txt
--- html5lib-0.999999999/html5lib.egg-info/requires.txt	2016-07-15 01:37:50.000000000 +0000
+++ html5lib-1.0.1/html5lib.egg-info/requires.txt	2017-12-07 14:09:53.000000000 +0000
@@ -1,9 +1,5 @@
-six
+six>=1.9
 webencodings
-setuptools>=18.5
-
-[:python_version == '2.6']
-ordereddict
 
 [all]
 genshi
diff -Nru html5lib-0.999999999/html5lib.egg-info/SOURCES.txt html5lib-1.0.1/html5lib.egg-info/SOURCES.txt
--- html5lib-0.999999999/html5lib.egg-info/SOURCES.txt	2016-07-15 01:37:52.000000000 +0000
+++ html5lib-1.0.1/html5lib.egg-info/SOURCES.txt	2017-12-07 14:09:53.000000000 +0000
@@ -40,6 +40,7 @@
 html5lib/tests/conftest.py
 html5lib/tests/sanitizer.py
 html5lib/tests/support.py
+html5lib/tests/test_alphabeticalattributes.py
 html5lib/tests/test_encoding.py
 html5lib/tests/test_meta.py
 html5lib/tests/test_optionaltags_filter.py
diff -Nru html5lib-0.999999999/PKG-INFO html5lib-1.0.1/PKG-INFO
--- html5lib-0.999999999/PKG-INFO	2016-07-15 01:37:52.000000000 +0000
+++ html5lib-1.0.1/PKG-INFO	2017-12-07 14:09:53.000000000 +0000
@@ -1,11 +1,12 @@
 Metadata-Version: 1.1
 Name: html5lib
-Version: 0.999999999
+Version: 1.0.1
 Summary: HTML parser based on the WHATWG HTML specification
 Home-page: https://github.com/html5lib/html5lib-python
 Author: James Graham
 Author-email: james@hoppipolla.co.uk
 License: MIT License
+Description-Content-Type: UNKNOWN
 Description: html5lib
         ========
         
@@ -98,7 +99,7 @@
         Installation
         ------------
         
-        html5lib works on CPython 2.6+, CPython 3.3+ and PyPy.  To install it,
+        html5lib works on CPython 2.7+, CPython 3.3+ and PyPy.  To install it,
         use:
         
         .. code-block:: bash
@@ -136,8 +137,7 @@
         -----
         
         Unit tests require the ``pytest`` and ``mock`` libraries and can be
-        run using the ``py.test`` command in the root directory;
-        ``ordereddict`` is required under Python 2.6. All should pass.
+        run using the ``py.test`` command in the root directory.
         
         Test data are contained in a separate `html5lib-tests
         <https://github.com/html5lib/html5lib-tests>`_ repository and included
@@ -162,6 +162,50 @@
         Change Log
         ----------
         
+        1.0.1
+        ~~~~~
+        
+        Released on December 7, 2017
+        
+        Breaking changes:
+        
+        * Drop support for Python 2.6. (#330) (Thank you, Hugo, Will Kahn-Greene!)
+        * Remove ``utils/spider.py`` (#353) (Thank you, Jon Dufresne!)
+        
+        Features:
+        
+        * Improve documentation. (#300, #307) (Thank you, Jon Dufresne, Tom Most,
+          Will Kahn-Greene!)
+        * Add iframe seamless boolean attribute. (Thank you, Ritwik Gupta!)
+        * Add itemscope as a boolean attribute. (#194) (Thank you, Jonathan Vanasco!)
+        * Support Python 3.6. (#333) (Thank you, Jon Dufresne!)
+        * Add CI support for Windows using AppVeyor. (Thank you, John Vandenberg!)
+        * Improve testing and CI and add code coverage (#323, #334), (Thank you, Jon
+          Dufresne, John Vandenberg, Geoffrey Sneddon, Will Kahn-Greene!)
+        * Semver-compliant version number.
+        
+        Bug fixes:
+        
+        * Add support for setuptools < 18.5 to support environment markers. (Thank you,
+          John Vandenberg!)
+        * Add explicit dependency for six >= 1.9. (Thank you, Eric Amorde!)
+        * Fix regexes to work with Python 3.7 regex adjustments. (#318, #379) (Thank
+          you, Benedikt Morbach, Ville Skyttä, Mark Vasilkov!)
+        * Fix alphabeticalattributes filter namespace bug. (#324) (Thank you, Will
+          Kahn-Greene!)
+        * Include license file in generated wheel package. (#350) (Thank you, Jon
+          Dufresne!)
+        * Fix annotation-xml typo. (#339) (Thank you, Will Kahn-Greene!)
+        * Allow uppercase hex chararcters in CSS colour check. (#377) (Thank you,
+          Komal Dembla, Hugo!)
+        
+        
+        1.0
+        ~~~
+        
+        Released and unreleased on December 7, 2017. Badly packaged release.
+        
+        
         0.999999999/1.0b10
         ~~~~~~~~~~~~~~~~~~
         
@@ -186,7 +230,7 @@
         
         * Cease supporting DATrie under PyPy.
         
-        * **Remove ``PullDOM`` support, as this hasn't ever been properly
+        * **Remove PullDOM support, as this hasn't ever been properly
           tested, doesn't entirely work, and as far as I can tell is
           completely unused by anyone.**
         
@@ -224,7 +268,7 @@
           to clarify their status as public.**
         
         * **Get rid of the sanitizer package. Merge sanitizer.sanitize into the
-          sanitizer.htmlsanitizer module and move that to saniziter. This means
+          sanitizer.htmlsanitizer module and move that to sanitizer. This means
           anyone who used sanitizer.sanitize or sanitizer.HTMLSanitizer needs no
           code changes.**
         
@@ -458,11 +502,11 @@
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python
 Classifier: Programming Language :: Python :: 2
-Classifier: Programming Language :: Python :: 2.6
 Classifier: Programming Language :: Python :: 2.7
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.3
 Classifier: Programming Language :: Python :: 3.4
 Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: 3.6
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Classifier: Topic :: Text Processing :: Markup :: HTML
diff -Nru html5lib-0.999999999/README.rst html5lib-1.0.1/README.rst
--- html5lib-0.999999999/README.rst	2016-07-12 15:04:04.000000000 +0000
+++ html5lib-1.0.1/README.rst	2017-12-07 12:25:26.000000000 +0000
@@ -90,7 +90,7 @@
 Installation
 ------------
 
-html5lib works on CPython 2.6+, CPython 3.3+ and PyPy.  To install it,
+html5lib works on CPython 2.7+, CPython 3.3+ and PyPy.  To install it,
 use:
 
 .. code-block:: bash
@@ -128,8 +128,7 @@
 -----
 
 Unit tests require the ``pytest`` and ``mock`` libraries and can be
-run using the ``py.test`` command in the root directory;
-``ordereddict`` is required under Python 2.6. All should pass.
+run using the ``py.test`` command in the root directory.
 
 Test data are contained in a separate `html5lib-tests
 <https://github.com/html5lib/html5lib-tests>`_ repository and included
diff -Nru html5lib-0.999999999/requirements-test.txt html5lib-1.0.1/requirements-test.txt
--- html5lib-0.999999999/requirements-test.txt	2016-05-20 15:46:08.000000000 +0000
+++ html5lib-1.0.1/requirements-test.txt	2017-12-07 12:25:26.000000000 +0000
@@ -1,7 +1,10 @@
 -r requirements.txt
 
-flake8
-pytest
+tox
+
+flake8<3.0
+
+pytest==3.2.5
+coverage
 pytest-expect>=1.1,<2.0
 mock
-ordereddict ; python_version < '2.7'
diff -Nru html5lib-0.999999999/requirements.txt html5lib-1.0.1/requirements.txt
--- html5lib-0.999999999/requirements.txt	2016-07-10 23:36:48.000000000 +0000
+++ html5lib-1.0.1/requirements.txt	2017-12-07 12:25:26.000000000 +0000
@@ -1,4 +1,2 @@
-six
+six>=1.9
 webencodings
-ordereddict ; python_version < '2.7'
-setuptools>=18.5
diff -Nru html5lib-0.999999999/setup.cfg html5lib-1.0.1/setup.cfg
--- html5lib-0.999999999/setup.cfg	2016-07-15 01:37:52.000000000 +0000
+++ html5lib-1.0.1/setup.cfg	2017-12-07 14:09:53.000000000 +0000
@@ -10,8 +10,10 @@
 ignore = N
 max-line-length = 139
 
+[metadata]
+license_file = LICENSE
+
 [egg_info]
 tag_build = 
 tag_date = 0
-tag_svn_revision = 0
 
diff -Nru html5lib-0.999999999/setup.py html5lib-1.0.1/setup.py
--- html5lib-0.999999999/setup.py	2016-07-10 23:36:48.000000000 +0000
+++ html5lib-1.0.1/setup.py	2017-12-07 12:25:26.000000000 +0000
@@ -8,10 +8,54 @@
 from setuptools import setup, find_packages, __version__ as setuptools_version
 from pkg_resources import parse_version
 
-if parse_version(setuptools_version) < parse_version("18.5"):
-    print("html5lib requires setuptools version 18.5 or above; "
-          "please upgrade before installing (you have %s)" % setuptools_version)
-    sys.exit(1)
+import pkg_resources
+
+try:
+    import _markerlib.markers
+except ImportError:
+    _markerlib = None
+
+
+# _markerlib.default_environment() obtains its data from _VARS
+# and wraps it in another dict, but _markerlib_evaluate writes
+# to the dict while it is iterating the keys, causing an error
+# on Python 3 only.
+# Replace _markerlib.default_environment to return a custom dict
+# that has all the necessary markers, and ignores any writes.
+
+class Python3MarkerDict(dict):
+
+    def __setitem__(self, key, value):
+        pass
+
+    def pop(self, i=-1):
+        return self[i]
+
+
+if _markerlib and sys.version_info[0] == 3:
+    env = _markerlib.markers._VARS
+    for key in list(env.keys()):
+        new_key = key.replace('.', '_')
+        if new_key != key:
+            env[new_key] = env[key]
+
+    _markerlib.markers._VARS = Python3MarkerDict(env)
+
+    def default_environment():
+        return _markerlib.markers._VARS
+
+    _markerlib.default_environment = default_environment
+
+# Avoid the very buggy pkg_resources.parser, which doesnt consistently
+# recognise the markers needed by this setup.py
+# Change this to setuptools 20.10.0 to support all markers.
+if pkg_resources:
+    if parse_version(setuptools_version) < parse_version('18.5'):
+        MarkerEvaluation = pkg_resources.MarkerEvaluation
+
+        del pkg_resources.parser
+        pkg_resources.evaluate_marker = MarkerEvaluation._markerlib_evaluate
+        MarkerEvaluation.evaluate_marker = MarkerEvaluation._markerlib_evaluate
 
 classifiers = [
     'Development Status :: 5 - Production/Stable',
@@ -20,12 +64,12 @@
     'Operating System :: OS Independent',
     'Programming Language :: Python',
     'Programming Language :: Python :: 2',
-    'Programming Language :: Python :: 2.6',
     'Programming Language :: Python :: 2.7',
     'Programming Language :: Python :: 3',
     'Programming Language :: Python :: 3.3',
     'Programming Language :: Python :: 3.4',
     'Programming Language :: Python :: 3.5',
+    'Programming Language :: Python :: 3.6',
     'Topic :: Software Development :: Libraries :: Python Modules',
     'Topic :: Text Processing :: Markup :: HTML'
 ]
@@ -58,15 +102,10 @@
       maintainer_email='james@hoppipolla.co.uk',
       packages=find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]),
       install_requires=[
-          'six',
+          'six>=1.9',
           'webencodings',
-          'setuptools>=18.5'
       ],
       extras_require={
-          # A empty extra that only has a conditional marker will be
-          # unconditonally installed when the condition matches.
-          ":python_version == '2.6'": ["ordereddict"],
-
           # A conditional extra will only install these items when the extra is
           # requested and the condition matches.
           "datrie:platform_python_implementation == 'CPython'": ["datrie"],
diff -Nru html5lib-0.999999999/tox.ini html5lib-1.0.1/tox.ini
--- html5lib-0.999999999/tox.ini	2016-05-20 20:08:10.000000000 +0000
+++ html5lib-1.0.1/tox.ini	2017-12-07 12:25:26.000000000 +0000
@@ -1,17 +1,23 @@
 [tox]
-envlist = {py26,py27,py33,py34,py35,pypy}-{base,optional}
+envlist = {py27,py33,py34,py35,py36,pypy}-{base,six19,optional}
 
 [testenv]
 deps =
-  flake8
-  pytest
-  pytest-expect>=1.1,<2.0
-  mock
-  base: six
-  base: webencodings
-  py26-base: ordereddict
   optional: -r{toxinidir}/requirements-optional.txt
+  -r{toxinidir}/requirements-test.txt
+  doc: Sphinx
 
+passenv =
+  PYTEST_COMMAND
+  COVERAGE_RUN_OPTIONS
 commands =
-  {envbindir}/py.test
-  {toxinidir}/flake8-run.sh
+  six19: pip install six==1.9
+  {env:PYTEST_COMMAND:{envbindir}/py.test} {posargs}
+  flake8 {toxinidir}
+
+[testenv:doc]
+changedir = doc
+commands = sphinx-build -b html . _build
+
+[flake8]
+exclude = ./.tox