diff -Nru beautifulsoup4-4.9.3/beautifulsoup4.egg-info/PKG-INFO beautifulsoup4-4.10.0/beautifulsoup4.egg-info/PKG-INFO
--- beautifulsoup4-4.9.3/beautifulsoup4.egg-info/PKG-INFO	2020-10-03 15:34:15.000000000 +0000
+++ beautifulsoup4-4.10.0/beautifulsoup4.egg-info/PKG-INFO	2021-09-08 00:13:24.000000000 +0000
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: beautifulsoup4
-Version: 4.9.3
+Version: 4.10.0
 Summary: Screen-scraping library
 Home-page: http://www.crummy.com/software/BeautifulSoup/bs4/
 Author: Leonard Richardson
@@ -62,17 +62,11 @@
         
         # Note on Python 2 sunsetting
         
-        Since 2012, Beautiful Soup has been developed as a Python 2 library
-        which is automatically converted to Python 3 code as necessary. This
-        makes it impossible to take advantage of some features of Python
-        3.
-        
-        For this reason, I plan to discontinue Beautiful Soup's Python 2
-        support at some point after December 31, 2020: one year after the
-        sunset date for Python 2 itself. Beyond that point, new Beautiful Soup
-        development will exclusively target Python 3. Of course, older
-        releases of Beautiful Soup, which support both versions, will continue
-        to be available.
+        Beautiful Soup's support for Python 2 was discontinued on December 31,
+        2020: one year after the sunset date for Python 2 itself. From this
+        point onward, new Beautiful Soup development will exclusively target
+        Python 3. The final release of Beautiful Soup 4 to support Python 2
+        was 4.9.3.
         
         # Supporting the project
         
@@ -102,25 +96,20 @@
         ```
         
         ```
-        $ python -m unittest discover -s bs4
+        $ python3 -m unittest discover -s bs4
         ```
         
-        If you checked out the source tree, you should see a script in the
-        home directory called test-all-versions. This script will run the unit
-        tests under Python 2, then create a temporary Python 3 conversion of
-        the source and run the unit tests again under Python 3.
-        
 Platform: UNKNOWN
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Programming Language :: Python
-Classifier: Programming Language :: Python :: 2.7
 Classifier: Programming Language :: Python :: 3
 Classifier: Topic :: Text Processing :: Markup :: HTML
 Classifier: Topic :: Text Processing :: Markup :: XML
 Classifier: Topic :: Text Processing :: Markup :: SGML
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Requires-Python: >3.0.0
 Description-Content-Type: text/markdown
 Provides-Extra: html5lib
 Provides-Extra: lxml
diff -Nru beautifulsoup4-4.9.3/beautifulsoup4.egg-info/requires.txt beautifulsoup4-4.10.0/beautifulsoup4.egg-info/requires.txt
--- beautifulsoup4-4.9.3/beautifulsoup4.egg-info/requires.txt	2020-10-03 15:34:15.000000000 +0000
+++ beautifulsoup4-4.10.0/beautifulsoup4.egg-info/requires.txt	2021-09-08 00:13:24.000000000 +0000
@@ -1,8 +1,3 @@
-
-[:python_version < "3.0"]
-soupsieve<2.0,>1.2
-
-[:python_version >= "3.0"]
 soupsieve>1.2
 
 [html5lib]
diff -Nru beautifulsoup4-4.9.3/beautifulsoup4.egg-info/SOURCES.txt beautifulsoup4-4.10.0/beautifulsoup4.egg-info/SOURCES.txt
--- beautifulsoup4-4.9.3/beautifulsoup4.egg-info/SOURCES.txt	2020-10-03 15:34:15.000000000 +0000
+++ beautifulsoup4-4.10.0/beautifulsoup4.egg-info/SOURCES.txt	2021-09-08 00:13:24.000000000 +0000
@@ -4,7 +4,7 @@
 NEWS.txt
 README.md
 TODO.txt
-convert-py3k
+parse.txt
 setup.cfg
 setup.py
 test-all-versions
diff -Nru beautifulsoup4-4.9.3/bs4/builder/_html5lib.py beautifulsoup4-4.10.0/bs4/builder/_html5lib.py
--- beautifulsoup4-4.9.3/bs4/builder/_html5lib.py	2020-09-26 14:36:10.000000000 +0000
+++ beautifulsoup4-4.10.0/bs4/builder/_html5lib.py	2021-09-07 23:36:46.000000000 +0000
@@ -33,7 +33,7 @@
     # Pre-0.99999999
     from html5lib.treebuilders import _base as treebuilder_base
     new_html5lib = False
-except ImportError, e:
+except ImportError as e:
     # 0.99999999 and up
     from html5lib.treebuilders import base as treebuilder_base
     new_html5lib = True
@@ -79,7 +79,7 @@
         parser = html5lib.HTMLParser(tree=self.create_treebuilder)
         self.underlying_builder.parser = parser
         extra_kwargs = dict()
-        if not isinstance(markup, unicode):
+        if not isinstance(markup, str):
             if new_html5lib:
                 extra_kwargs['override_encoding'] = self.user_specified_encoding
             else:
@@ -87,13 +87,13 @@
         doc = parser.parse(markup, **extra_kwargs)
         
         # Set the character encoding detected by the tokenizer.
-        if isinstance(markup, unicode):
+        if isinstance(markup, str):
             # We need to special-case this because html5lib sets
             # charEncoding to UTF-8 if it gets Unicode input.
             doc.original_encoding = None
         else:
             original_encoding = parser.tokenizer.stream.charEncoding[0]
-            if not isinstance(original_encoding, basestring):
+            if not isinstance(original_encoding, str):
                 # In 0.99999999 and up, the encoding is an html5lib
                 # Encoding object. We want to use a string for compatibility
                 # with other tree builders.
@@ -110,7 +110,7 @@
 
     def test_fragment_to_document(self, fragment):
         """See `TreeBuilder`."""
-        return u'<html><head></head><body>%s</body></html>' % fragment
+        return '<html><head></head><body>%s</body></html>' % fragment
 
 
 class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
@@ -217,7 +217,7 @@
                 rv.append("|%s<%s>" % (' ' * indent, name))
                 if element.attrs:
                     attributes = []
-                    for name, value in element.attrs.items():
+                    for name, value in list(element.attrs.items()):
                         if isinstance(name, NamespacedAttribute):
                             name = "%s %s" % (prefixes[name.namespace], name.name)
                         if isinstance(value, list):
@@ -272,7 +272,7 @@
 
     def appendChild(self, node):
         string_child = child = None
-        if isinstance(node, basestring):
+        if isinstance(node, str):
             # Some other piece of code decided to pass in a string
             # instead of creating a TextElement object to contain the
             # string.
@@ -289,7 +289,7 @@
             child = node.element
             node.parent = self
 
-        if not isinstance(child, basestring) and child.parent is not None:
+        if not isinstance(child, str) and child.parent is not None:
             node.element.extract()
 
         if (string_child is not None and self.element.contents
@@ -302,7 +302,7 @@
             old_element.replace_with(new_element)
             self.soup._most_recent_element = new_element
         else:
-            if isinstance(node, basestring):
+            if isinstance(node, str):
                 # Create a brand new NavigableString from this string.
                 child = self.soup.new_string(node)
 
@@ -340,7 +340,7 @@
 
             self.soup.builder._replace_cdata_list_attribute_values(
                 self.name, attributes)
-            for name, value in attributes.items():
+            for name, value in list(attributes.items()):
                 self.element[name] = value
 
             # The attributes may contain variables that need substitution.
diff -Nru beautifulsoup4-4.9.3/bs4/builder/_htmlparser.py beautifulsoup4-4.10.0/bs4/builder/_htmlparser.py
--- beautifulsoup4-4.9.3/bs4/builder/_htmlparser.py	2020-09-26 14:36:05.000000000 +0000
+++ beautifulsoup4-4.10.0/bs4/builder/_htmlparser.py	2021-09-07 23:36:46.000000000 +0000
@@ -8,11 +8,11 @@
     'HTMLParserTreeBuilder',
     ]
 
-from HTMLParser import HTMLParser
+from html.parser import HTMLParser
 
 try:
-    from HTMLParser import HTMLParseError
-except ImportError, e:
+    from html.parser import HTMLParseError
+except ImportError as e:
     # HTMLParseError is removed in Python 3.5. Since it can never be
     # thrown in 3.5, we can just define our own class as a placeholder.
     class HTMLParseError(Exception):
@@ -219,19 +219,19 @@
                     continue
                 try:
                     data = bytearray([real_name]).decode(encoding)
-                except UnicodeDecodeError, e:
+                except UnicodeDecodeError as e:
                     pass
         if not data:
             try:
-                data = unichr(real_name)
-            except (ValueError, OverflowError), e:
+                data = chr(real_name)
+            except (ValueError, OverflowError) as e:
                 pass
-        data = data or u"\N{REPLACEMENT CHARACTER}"
+        data = data or "\N{REPLACEMENT CHARACTER}"
         self.handle_data(data)
 
     def handle_entityref(self, name):
         """Handle a named entity reference by converting it to the
-        corresponding Unicode character and treating it as textual
+        corresponding Unicode character(s) and treating it as textual
         data.
 
         :param name: Name of the entity reference.
@@ -353,15 +353,30 @@
          document to Unicode and parsing it. Each strategy will be tried 
          in turn.
         """
-        if isinstance(markup, unicode):
+        if isinstance(markup, str):
             # Parse Unicode as-is.
             yield (markup, None, None, False)
             return
 
         # Ask UnicodeDammit to sniff the most likely encoding.
+
+        # This was provided by the end-user; treat it as a known
+        # definite encoding per the algorithm laid out in the HTML5
+        # spec.  (See the EncodingDetector class for details.)
+        known_definite_encodings = [user_specified_encoding]
+
+        # This was found in the document; treat it as a slightly lower-priority
+        # user encoding.
+        user_encodings = [document_declared_encoding]
+
         try_encodings = [user_specified_encoding, document_declared_encoding]
-        dammit = UnicodeDammit(markup, try_encodings, is_html=True,
-                               exclude_encodings=exclude_encodings)
+        dammit = UnicodeDammit(
+            markup,
+            known_definite_encodings=known_definite_encodings,
+            user_encodings=user_encodings,
+            is_html=True,
+            exclude_encodings=exclude_encodings
+        )
         yield (dammit.markup, dammit.original_encoding,
                dammit.declared_html_encoding,
                dammit.contains_replacement_characters)
@@ -376,7 +391,7 @@
         try:
             parser.feed(markup)
             parser.close()
-        except HTMLParseError, e:
+        except HTMLParseError as e:
             warnings.warn(RuntimeWarning(
                 "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
             raise e
diff -Nru beautifulsoup4-4.9.3/bs4/builder/__init__.py beautifulsoup4-4.10.0/bs4/builder/__init__.py
--- beautifulsoup4-4.9.3/bs4/builder/__init__.py	2020-05-30 18:17:21.000000000 +0000
+++ beautifulsoup4-4.10.0/bs4/builder/__init__.py	2021-09-07 23:36:46.000000000 +0000
@@ -234,7 +234,8 @@
         :param markup: Some markup -- probably a bytestring.
         :param user_specified_encoding: The user asked to try this encoding.
         :param document_declared_encoding: The markup itself claims to be
-            in this encoding.
+            in this encoding. NOTE: This argument is not used by the
+            calling code and can probably be removed.
         :param exclude_encodings: The user asked _not_ to try any of
             these encodings.
 
@@ -300,13 +301,13 @@
             universal = self.cdata_list_attributes.get('*', [])
             tag_specific = self.cdata_list_attributes.get(
                 tag_name.lower(), None)
-            for attr in attrs.keys():
+            for attr in list(attrs.keys()):
                 if attr in universal or (tag_specific and attr in tag_specific):
                     # We have a "class"-type attribute whose string
                     # value is a whitespace-separated list of
                     # values. Split it into a list.
                     value = attrs[attr]
-                    if isinstance(value, basestring):
+                    if isinstance(value, str):
                         values = nonwhitespace_re.findall(value)
                     else:
                         # html5lib sometimes calls setAttributes twice
@@ -496,7 +497,7 @@
         """
         if isinstance(message_or_exception, Exception):
             e = message_or_exception
-            message_or_exception = "%s: %s" % (e.__class__.__name__, unicode(e))
+            message_or_exception = "%s: %s" % (e.__class__.__name__, str(e))
         super(ParserRejectedMarkup, self).__init__(message_or_exception)
             
 # Builders are registered in reverse order of priority, so that custom
diff -Nru beautifulsoup4-4.9.3/bs4/builder/_lxml.py beautifulsoup4-4.10.0/bs4/builder/_lxml.py
--- beautifulsoup4-4.9.3/bs4/builder/_lxml.py	2020-09-07 11:13:41.000000000 +0000
+++ beautifulsoup4-4.10.0/bs4/builder/_lxml.py	2021-09-07 23:36:46.000000000 +0000
@@ -8,11 +8,11 @@
 
 try:
     from collections.abc import Callable # Python 3.6
-except ImportError , e:
+except ImportError as e:
     from collections import Callable
 
 from io import BytesIO
-from StringIO import StringIO
+from io import StringIO
 from lxml import etree
 from bs4.element import (
     Comment,
@@ -35,7 +35,7 @@
 
 def _invert(d):
     "Invert a dictionary."
-    return dict((v,k) for k, v in d.items())
+    return dict((v,k) for k, v in list(d.items()))
 
 class LXMLTreeBuilderForXML(TreeBuilder):
     DEFAULT_PARSER_CLASS = etree.XMLParser
@@ -81,7 +81,7 @@
 
         :param mapping: A dictionary mapping namespace prefixes to URIs.
         """
-        for key, value in mapping.items():
+        for key, value in list(mapping.items()):
             if key and key not in self.soup._namespaces:
                 # Let the BeautifulSoup object know about a new namespace.
                 # If there are multiple namespaces defined with the same
@@ -169,27 +169,37 @@
         else:
             self.processing_instruction_class = XMLProcessingInstruction
 
-        if isinstance(markup, unicode):
+        if isinstance(markup, str):
             # We were given Unicode. Maybe lxml can parse Unicode on
             # this system?
             yield markup, None, document_declared_encoding, False
 
-        if isinstance(markup, unicode):
+        if isinstance(markup, str):
             # No, apparently not. Convert the Unicode to UTF-8 and
             # tell lxml to parse it as UTF-8.
             yield (markup.encode("utf8"), "utf8",
                    document_declared_encoding, False)
 
-        try_encodings = [user_specified_encoding, document_declared_encoding]
+        # This was provided by the end-user; treat it as a known
+        # definite encoding per the algorithm laid out in the HTML5
+        # spec.  (See the EncodingDetector class for details.)
+        known_definite_encodings = [user_specified_encoding]
+
+        # This was found in the document; treat it as a slightly lower-priority
+        # user encoding.
+        user_encodings = [document_declared_encoding]
         detector = EncodingDetector(
-            markup, try_encodings, is_html, exclude_encodings)
+            markup, known_definite_encodings=known_definite_encodings,
+            user_encodings=user_encodings, is_html=is_html,
+            exclude_encodings=exclude_encodings
+        )
         for encoding in detector.encodings:
             yield (detector.markup, encoding, document_declared_encoding, False)
 
     def feed(self, markup):
         if isinstance(markup, bytes):
             markup = BytesIO(markup)
-        elif isinstance(markup, unicode):
+        elif isinstance(markup, str):
             markup = StringIO(markup)
 
         # Call feed() at least once, even if the markup is empty,
@@ -204,7 +214,7 @@
                 if len(data) != 0:
                     self.parser.feed(data)
             self.parser.close()
-        except (UnicodeDecodeError, LookupError, etree.ParserError), e:
+        except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
             raise ParserRejectedMarkup(e)
 
     def close(self):
@@ -233,7 +243,7 @@
             # Also treat the namespace mapping as a set of attributes on the
             # tag, so we can recreate it later.
             attrs = attrs.copy()
-            for prefix, namespace in nsmap.items():
+            for prefix, namespace in list(nsmap.items()):
                 attribute = NamespacedAttribute(
                     "xmlns", prefix, "http://www.w3.org/2000/xmlns/")
                 attrs[attribute] = namespace
@@ -242,7 +252,7 @@
         # from lxml with namespaces attached to their names, and
         # turn then into NamespacedAttribute objects.
         new_attrs = {}
-        for attr, value in attrs.items():
+        for attr, value in list(attrs.items()):
             namespace, attr = self._getNsTag(attr)
             if namespace is None:
                 new_attrs[attr] = value
@@ -302,7 +312,7 @@
 
     def test_fragment_to_document(self, fragment):
         """See `TreeBuilder`."""
-        return u'<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
+        return '<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
 
 
 class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
@@ -323,10 +333,10 @@
             self.parser = self.parser_for(encoding)
             self.parser.feed(markup)
             self.parser.close()
-        except (UnicodeDecodeError, LookupError, etree.ParserError), e:
+        except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
             raise ParserRejectedMarkup(e)
 
 
     def test_fragment_to_document(self, fragment):
         """See `TreeBuilder`."""
-        return u'<html><body>%s</body></html>' % fragment
+        return '<html><body>%s</body></html>' % fragment
diff -Nru beautifulsoup4-4.9.3/bs4/dammit.py beautifulsoup4-4.10.0/bs4/dammit.py
--- beautifulsoup4-4.9.3/bs4/dammit.py	2020-05-17 17:56:04.000000000 +0000
+++ beautifulsoup4-4.10.0/bs4/dammit.py	2021-09-07 23:36:46.000000000 +0000
@@ -9,8 +9,9 @@
 # Use of this source code is governed by the MIT license.
 __license__ = "MIT"
 
+from html.entities import codepoint2name
+from collections import defaultdict
 import codecs
-from htmlentitydefs import codepoint2name
 import re
 import logging
 import string
@@ -22,7 +23,7 @@
     #  PyPI package: cchardet
     import cchardet
     def chardet_dammit(s):
-        if isinstance(s, unicode):
+        if isinstance(s, str):
             return None
         return cchardet.detect(s)['encoding']
 except ImportError:
@@ -32,7 +33,7 @@
         #  PyPI package: chardet
         import chardet
         def chardet_dammit(s):
-            if isinstance(s, unicode):
+            if isinstance(s, str):
                 return None
             return chardet.detect(s)['encoding']
         #import chardet.constants
@@ -53,46 +54,2380 @@
 
 # Build bytestring and Unicode versions of regular expressions for finding
 # a declared encoding inside an XML or HTML document.
-xml_encoding = u'^\\s*<\\?.*encoding=[\'"](.*?)[\'"].*\\?>'
-html_meta = u'<\\s*meta[^>]+charset\\s*=\\s*["\']?([^>]*?)[ /;\'">]'
+xml_encoding = '^\\s*<\\?.*encoding=[\'"](.*?)[\'"].*\\?>'
+html_meta = '<\\s*meta[^>]+charset\\s*=\\s*["\']?([^>]*?)[ /;\'">]'
 encoding_res = dict()
 encoding_res[bytes] = {
     'html' : re.compile(html_meta.encode("ascii"), re.I),
     'xml' : re.compile(xml_encoding.encode("ascii"), re.I),
 }
-encoding_res[unicode] = {
+encoding_res[str] = {
     'html' : re.compile(html_meta, re.I),
     'xml' : re.compile(xml_encoding, re.I)
 }
 
+try:
+    from html.entities import html5
+except ImportError:
+    # This is a copy of html.entities.html5 from Python 3.9. There's
+    # no equivalent table in Python 2, so we'll just provide a copy
+    # here.
+    html5 = {
+    'Aacute': '\xc1',
+    'aacute': '\xe1',
+    'Aacute;': '\xc1',
+    'aacute;': '\xe1',
+    'Abreve;': '\u0102',
+    'abreve;': '\u0103',
+    'ac;': '\u223e',
+    'acd;': '\u223f',
+    'acE;': '\u223e\u0333',
+    'Acirc': '\xc2',
+    'acirc': '\xe2',
+    'Acirc;': '\xc2',
+    'acirc;': '\xe2',
+    'acute': '\xb4',
+    'acute;': '\xb4',
+    'Acy;': '\u0410',
+    'acy;': '\u0430',
+    'AElig': '\xc6',
+    'aelig': '\xe6',
+    'AElig;': '\xc6',
+    'aelig;': '\xe6',
+    'af;': '\u2061',
+    'Afr;': '\U0001d504',
+    'afr;': '\U0001d51e',
+    'Agrave': '\xc0',
+    'agrave': '\xe0',
+    'Agrave;': '\xc0',
+    'agrave;': '\xe0',
+    'alefsym;': '\u2135',
+    'aleph;': '\u2135',
+    'Alpha;': '\u0391',
+    'alpha;': '\u03b1',
+    'Amacr;': '\u0100',
+    'amacr;': '\u0101',
+    'amalg;': '\u2a3f',
+    'AMP': '&',
+    'amp': '&',
+    'AMP;': '&',
+    'amp;': '&',
+    'And;': '\u2a53',
+    'and;': '\u2227',
+    'andand;': '\u2a55',
+    'andd;': '\u2a5c',
+    'andslope;': '\u2a58',
+    'andv;': '\u2a5a',
+    'ang;': '\u2220',
+    'ange;': '\u29a4',
+    'angle;': '\u2220',
+    'angmsd;': '\u2221',
+    'angmsdaa;': '\u29a8',
+    'angmsdab;': '\u29a9',
+    'angmsdac;': '\u29aa',
+    'angmsdad;': '\u29ab',
+    'angmsdae;': '\u29ac',
+    'angmsdaf;': '\u29ad',
+    'angmsdag;': '\u29ae',
+    'angmsdah;': '\u29af',
+    'angrt;': '\u221f',
+    'angrtvb;': '\u22be',
+    'angrtvbd;': '\u299d',
+    'angsph;': '\u2222',
+    'angst;': '\xc5',
+    'angzarr;': '\u237c',
+    'Aogon;': '\u0104',
+    'aogon;': '\u0105',
+    'Aopf;': '\U0001d538',
+    'aopf;': '\U0001d552',
+    'ap;': '\u2248',
+    'apacir;': '\u2a6f',
+    'apE;': '\u2a70',
+    'ape;': '\u224a',
+    'apid;': '\u224b',
+    'apos;': "'",
+    'ApplyFunction;': '\u2061',
+    'approx;': '\u2248',
+    'approxeq;': '\u224a',
+    'Aring': '\xc5',
+    'aring': '\xe5',
+    'Aring;': '\xc5',
+    'aring;': '\xe5',
+    'Ascr;': '\U0001d49c',
+    'ascr;': '\U0001d4b6',
+    'Assign;': '\u2254',
+    'ast;': '*',
+    'asymp;': '\u2248',
+    'asympeq;': '\u224d',
+    'Atilde': '\xc3',
+    'atilde': '\xe3',
+    'Atilde;': '\xc3',
+    'atilde;': '\xe3',
+    'Auml': '\xc4',
+    'auml': '\xe4',
+    'Auml;': '\xc4',
+    'auml;': '\xe4',
+    'awconint;': '\u2233',
+    'awint;': '\u2a11',
+    'backcong;': '\u224c',
+    'backepsilon;': '\u03f6',
+    'backprime;': '\u2035',
+    'backsim;': '\u223d',
+    'backsimeq;': '\u22cd',
+    'Backslash;': '\u2216',
+    'Barv;': '\u2ae7',
+    'barvee;': '\u22bd',
+    'Barwed;': '\u2306',
+    'barwed;': '\u2305',
+    'barwedge;': '\u2305',
+    'bbrk;': '\u23b5',
+    'bbrktbrk;': '\u23b6',
+    'bcong;': '\u224c',
+    'Bcy;': '\u0411',
+    'bcy;': '\u0431',
+    'bdquo;': '\u201e',
+    'becaus;': '\u2235',
+    'Because;': '\u2235',
+    'because;': '\u2235',
+    'bemptyv;': '\u29b0',
+    'bepsi;': '\u03f6',
+    'bernou;': '\u212c',
+    'Bernoullis;': '\u212c',
+    'Beta;': '\u0392',
+    'beta;': '\u03b2',
+    'beth;': '\u2136',
+    'between;': '\u226c',
+    'Bfr;': '\U0001d505',
+    'bfr;': '\U0001d51f',
+    'bigcap;': '\u22c2',
+    'bigcirc;': '\u25ef',
+    'bigcup;': '\u22c3',
+    'bigodot;': '\u2a00',
+    'bigoplus;': '\u2a01',
+    'bigotimes;': '\u2a02',
+    'bigsqcup;': '\u2a06',
+    'bigstar;': '\u2605',
+    'bigtriangledown;': '\u25bd',
+    'bigtriangleup;': '\u25b3',
+    'biguplus;': '\u2a04',
+    'bigvee;': '\u22c1',
+    'bigwedge;': '\u22c0',
+    'bkarow;': '\u290d',
+    'blacklozenge;': '\u29eb',
+    'blacksquare;': '\u25aa',
+    'blacktriangle;': '\u25b4',
+    'blacktriangledown;': '\u25be',
+    'blacktriangleleft;': '\u25c2',
+    'blacktriangleright;': '\u25b8',
+    'blank;': '\u2423',
+    'blk12;': '\u2592',
+    'blk14;': '\u2591',
+    'blk34;': '\u2593',
+    'block;': '\u2588',
+    'bne;': '=\u20e5',
+    'bnequiv;': '\u2261\u20e5',
+    'bNot;': '\u2aed',
+    'bnot;': '\u2310',
+    'Bopf;': '\U0001d539',
+    'bopf;': '\U0001d553',
+    'bot;': '\u22a5',
+    'bottom;': '\u22a5',
+    'bowtie;': '\u22c8',
+    'boxbox;': '\u29c9',
+    'boxDL;': '\u2557',
+    'boxDl;': '\u2556',
+    'boxdL;': '\u2555',
+    'boxdl;': '\u2510',
+    'boxDR;': '\u2554',
+    'boxDr;': '\u2553',
+    'boxdR;': '\u2552',
+    'boxdr;': '\u250c',
+    'boxH;': '\u2550',
+    'boxh;': '\u2500',
+    'boxHD;': '\u2566',
+    'boxHd;': '\u2564',
+    'boxhD;': '\u2565',
+    'boxhd;': '\u252c',
+    'boxHU;': '\u2569',
+    'boxHu;': '\u2567',
+    'boxhU;': '\u2568',
+    'boxhu;': '\u2534',
+    'boxminus;': '\u229f',
+    'boxplus;': '\u229e',
+    'boxtimes;': '\u22a0',
+    'boxUL;': '\u255d',
+    'boxUl;': '\u255c',
+    'boxuL;': '\u255b',
+    'boxul;': '\u2518',
+    'boxUR;': '\u255a',
+    'boxUr;': '\u2559',
+    'boxuR;': '\u2558',
+    'boxur;': '\u2514',
+    'boxV;': '\u2551',
+    'boxv;': '\u2502',
+    'boxVH;': '\u256c',
+    'boxVh;': '\u256b',
+    'boxvH;': '\u256a',
+    'boxvh;': '\u253c',
+    'boxVL;': '\u2563',
+    'boxVl;': '\u2562',
+    'boxvL;': '\u2561',
+    'boxvl;': '\u2524',
+    'boxVR;': '\u2560',
+    'boxVr;': '\u255f',
+    'boxvR;': '\u255e',
+    'boxvr;': '\u251c',
+    'bprime;': '\u2035',
+    'Breve;': '\u02d8',
+    'breve;': '\u02d8',
+    'brvbar': '\xa6',
+    'brvbar;': '\xa6',
+    'Bscr;': '\u212c',
+    'bscr;': '\U0001d4b7',
+    'bsemi;': '\u204f',
+    'bsim;': '\u223d',
+    'bsime;': '\u22cd',
+    'bsol;': '\\',
+    'bsolb;': '\u29c5',
+    'bsolhsub;': '\u27c8',
+    'bull;': '\u2022',
+    'bullet;': '\u2022',
+    'bump;': '\u224e',
+    'bumpE;': '\u2aae',
+    'bumpe;': '\u224f',
+    'Bumpeq;': '\u224e',
+    'bumpeq;': '\u224f',
+    'Cacute;': '\u0106',
+    'cacute;': '\u0107',
+    'Cap;': '\u22d2',
+    'cap;': '\u2229',
+    'capand;': '\u2a44',
+    'capbrcup;': '\u2a49',
+    'capcap;': '\u2a4b',
+    'capcup;': '\u2a47',
+    'capdot;': '\u2a40',
+    'CapitalDifferentialD;': '\u2145',
+    'caps;': '\u2229\ufe00',
+    'caret;': '\u2041',
+    'caron;': '\u02c7',
+    'Cayleys;': '\u212d',
+    'ccaps;': '\u2a4d',
+    'Ccaron;': '\u010c',
+    'ccaron;': '\u010d',
+    'Ccedil': '\xc7',
+    'ccedil': '\xe7',
+    'Ccedil;': '\xc7',
+    'ccedil;': '\xe7',
+    'Ccirc;': '\u0108',
+    'ccirc;': '\u0109',
+    'Cconint;': '\u2230',
+    'ccups;': '\u2a4c',
+    'ccupssm;': '\u2a50',
+    'Cdot;': '\u010a',
+    'cdot;': '\u010b',
+    'cedil': '\xb8',
+    'cedil;': '\xb8',
+    'Cedilla;': '\xb8',
+    'cemptyv;': '\u29b2',
+    'cent': '\xa2',
+    'cent;': '\xa2',
+    'CenterDot;': '\xb7',
+    'centerdot;': '\xb7',
+    'Cfr;': '\u212d',
+    'cfr;': '\U0001d520',
+    'CHcy;': '\u0427',
+    'chcy;': '\u0447',
+    'check;': '\u2713',
+    'checkmark;': '\u2713',
+    'Chi;': '\u03a7',
+    'chi;': '\u03c7',
+    'cir;': '\u25cb',
+    'circ;': '\u02c6',
+    'circeq;': '\u2257',
+    'circlearrowleft;': '\u21ba',
+    'circlearrowright;': '\u21bb',
+    'circledast;': '\u229b',
+    'circledcirc;': '\u229a',
+    'circleddash;': '\u229d',
+    'CircleDot;': '\u2299',
+    'circledR;': '\xae',
+    'circledS;': '\u24c8',
+    'CircleMinus;': '\u2296',
+    'CirclePlus;': '\u2295',
+    'CircleTimes;': '\u2297',
+    'cirE;': '\u29c3',
+    'cire;': '\u2257',
+    'cirfnint;': '\u2a10',
+    'cirmid;': '\u2aef',
+    'cirscir;': '\u29c2',
+    'ClockwiseContourIntegral;': '\u2232',
+    'CloseCurlyDoubleQuote;': '\u201d',
+    'CloseCurlyQuote;': '\u2019',
+    'clubs;': '\u2663',
+    'clubsuit;': '\u2663',
+    'Colon;': '\u2237',
+    'colon;': ':',
+    'Colone;': '\u2a74',
+    'colone;': '\u2254',
+    'coloneq;': '\u2254',
+    'comma;': ',',
+    'commat;': '@',
+    'comp;': '\u2201',
+    'compfn;': '\u2218',
+    'complement;': '\u2201',
+    'complexes;': '\u2102',
+    'cong;': '\u2245',
+    'congdot;': '\u2a6d',
+    'Congruent;': '\u2261',
+    'Conint;': '\u222f',
+    'conint;': '\u222e',
+    'ContourIntegral;': '\u222e',
+    'Copf;': '\u2102',
+    'copf;': '\U0001d554',
+    'coprod;': '\u2210',
+    'Coproduct;': '\u2210',
+    'COPY': '\xa9',
+    'copy': '\xa9',
+    'COPY;': '\xa9',
+    'copy;': '\xa9',
+    'copysr;': '\u2117',
+    'CounterClockwiseContourIntegral;': '\u2233',
+    'crarr;': '\u21b5',
+    'Cross;': '\u2a2f',
+    'cross;': '\u2717',
+    'Cscr;': '\U0001d49e',
+    'cscr;': '\U0001d4b8',
+    'csub;': '\u2acf',
+    'csube;': '\u2ad1',
+    'csup;': '\u2ad0',
+    'csupe;': '\u2ad2',
+    'ctdot;': '\u22ef',
+    'cudarrl;': '\u2938',
+    'cudarrr;': '\u2935',
+    'cuepr;': '\u22de',
+    'cuesc;': '\u22df',
+    'cularr;': '\u21b6',
+    'cularrp;': '\u293d',
+    'Cup;': '\u22d3',
+    'cup;': '\u222a',
+    'cupbrcap;': '\u2a48',
+    'CupCap;': '\u224d',
+    'cupcap;': '\u2a46',
+    'cupcup;': '\u2a4a',
+    'cupdot;': '\u228d',
+    'cupor;': '\u2a45',
+    'cups;': '\u222a\ufe00',
+    'curarr;': '\u21b7',
+    'curarrm;': '\u293c',
+    'curlyeqprec;': '\u22de',
+    'curlyeqsucc;': '\u22df',
+    'curlyvee;': '\u22ce',
+    'curlywedge;': '\u22cf',
+    'curren': '\xa4',
+    'curren;': '\xa4',
+    'curvearrowleft;': '\u21b6',
+    'curvearrowright;': '\u21b7',
+    'cuvee;': '\u22ce',
+    'cuwed;': '\u22cf',
+    'cwconint;': '\u2232',
+    'cwint;': '\u2231',
+    'cylcty;': '\u232d',
+    'Dagger;': '\u2021',
+    'dagger;': '\u2020',
+    'daleth;': '\u2138',
+    'Darr;': '\u21a1',
+    'dArr;': '\u21d3',
+    'darr;': '\u2193',
+    'dash;': '\u2010',
+    'Dashv;': '\u2ae4',
+    'dashv;': '\u22a3',
+    'dbkarow;': '\u290f',
+    'dblac;': '\u02dd',
+    'Dcaron;': '\u010e',
+    'dcaron;': '\u010f',
+    'Dcy;': '\u0414',
+    'dcy;': '\u0434',
+    'DD;': '\u2145',
+    'dd;': '\u2146',
+    'ddagger;': '\u2021',
+    'ddarr;': '\u21ca',
+    'DDotrahd;': '\u2911',
+    'ddotseq;': '\u2a77',
+    'deg': '\xb0',
+    'deg;': '\xb0',
+    'Del;': '\u2207',
+    'Delta;': '\u0394',
+    'delta;': '\u03b4',
+    'demptyv;': '\u29b1',
+    'dfisht;': '\u297f',
+    'Dfr;': '\U0001d507',
+    'dfr;': '\U0001d521',
+    'dHar;': '\u2965',
+    'dharl;': '\u21c3',
+    'dharr;': '\u21c2',
+    'DiacriticalAcute;': '\xb4',
+    'DiacriticalDot;': '\u02d9',
+    'DiacriticalDoubleAcute;': '\u02dd',
+    'DiacriticalGrave;': '`',
+    'DiacriticalTilde;': '\u02dc',
+    'diam;': '\u22c4',
+    'Diamond;': '\u22c4',
+    'diamond;': '\u22c4',
+    'diamondsuit;': '\u2666',
+    'diams;': '\u2666',
+    'die;': '\xa8',
+    'DifferentialD;': '\u2146',
+    'digamma;': '\u03dd',
+    'disin;': '\u22f2',
+    'div;': '\xf7',
+    'divide': '\xf7',
+    'divide;': '\xf7',
+    'divideontimes;': '\u22c7',
+    'divonx;': '\u22c7',
+    'DJcy;': '\u0402',
+    'djcy;': '\u0452',
+    'dlcorn;': '\u231e',
+    'dlcrop;': '\u230d',
+    'dollar;': '$',
+    'Dopf;': '\U0001d53b',
+    'dopf;': '\U0001d555',
+    'Dot;': '\xa8',
+    'dot;': '\u02d9',
+    'DotDot;': '\u20dc',
+    'doteq;': '\u2250',
+    'doteqdot;': '\u2251',
+    'DotEqual;': '\u2250',
+    'dotminus;': '\u2238',
+    'dotplus;': '\u2214',
+    'dotsquare;': '\u22a1',
+    'doublebarwedge;': '\u2306',
+    'DoubleContourIntegral;': '\u222f',
+    'DoubleDot;': '\xa8',
+    'DoubleDownArrow;': '\u21d3',
+    'DoubleLeftArrow;': '\u21d0',
+    'DoubleLeftRightArrow;': '\u21d4',
+    'DoubleLeftTee;': '\u2ae4',
+    'DoubleLongLeftArrow;': '\u27f8',
+    'DoubleLongLeftRightArrow;': '\u27fa',
+    'DoubleLongRightArrow;': '\u27f9',
+    'DoubleRightArrow;': '\u21d2',
+    'DoubleRightTee;': '\u22a8',
+    'DoubleUpArrow;': '\u21d1',
+    'DoubleUpDownArrow;': '\u21d5',
+    'DoubleVerticalBar;': '\u2225',
+    'DownArrow;': '\u2193',
+    'Downarrow;': '\u21d3',
+    'downarrow;': '\u2193',
+    'DownArrowBar;': '\u2913',
+    'DownArrowUpArrow;': '\u21f5',
+    'DownBreve;': '\u0311',
+    'downdownarrows;': '\u21ca',
+    'downharpoonleft;': '\u21c3',
+    'downharpoonright;': '\u21c2',
+    'DownLeftRightVector;': '\u2950',
+    'DownLeftTeeVector;': '\u295e',
+    'DownLeftVector;': '\u21bd',
+    'DownLeftVectorBar;': '\u2956',
+    'DownRightTeeVector;': '\u295f',
+    'DownRightVector;': '\u21c1',
+    'DownRightVectorBar;': '\u2957',
+    'DownTee;': '\u22a4',
+    'DownTeeArrow;': '\u21a7',
+    'drbkarow;': '\u2910',
+    'drcorn;': '\u231f',
+    'drcrop;': '\u230c',
+    'Dscr;': '\U0001d49f',
+    'dscr;': '\U0001d4b9',
+    'DScy;': '\u0405',
+    'dscy;': '\u0455',
+    'dsol;': '\u29f6',
+    'Dstrok;': '\u0110',
+    'dstrok;': '\u0111',
+    'dtdot;': '\u22f1',
+    'dtri;': '\u25bf',
+    'dtrif;': '\u25be',
+    'duarr;': '\u21f5',
+    'duhar;': '\u296f',
+    'dwangle;': '\u29a6',
+    'DZcy;': '\u040f',
+    'dzcy;': '\u045f',
+    'dzigrarr;': '\u27ff',
+    'Eacute': '\xc9',
+    'eacute': '\xe9',
+    'Eacute;': '\xc9',
+    'eacute;': '\xe9',
+    'easter;': '\u2a6e',
+    'Ecaron;': '\u011a',
+    'ecaron;': '\u011b',
+    'ecir;': '\u2256',
+    'Ecirc': '\xca',
+    'ecirc': '\xea',
+    'Ecirc;': '\xca',
+    'ecirc;': '\xea',
+    'ecolon;': '\u2255',
+    'Ecy;': '\u042d',
+    'ecy;': '\u044d',
+    'eDDot;': '\u2a77',
+    'Edot;': '\u0116',
+    'eDot;': '\u2251',
+    'edot;': '\u0117',
+    'ee;': '\u2147',
+    'efDot;': '\u2252',
+    'Efr;': '\U0001d508',
+    'efr;': '\U0001d522',
+    'eg;': '\u2a9a',
+    'Egrave': '\xc8',
+    'egrave': '\xe8',
+    'Egrave;': '\xc8',
+    'egrave;': '\xe8',
+    'egs;': '\u2a96',
+    'egsdot;': '\u2a98',
+    'el;': '\u2a99',
+    'Element;': '\u2208',
+    'elinters;': '\u23e7',
+    'ell;': '\u2113',
+    'els;': '\u2a95',
+    'elsdot;': '\u2a97',
+    'Emacr;': '\u0112',
+    'emacr;': '\u0113',
+    'empty;': '\u2205',
+    'emptyset;': '\u2205',
+    'EmptySmallSquare;': '\u25fb',
+    'emptyv;': '\u2205',
+    'EmptyVerySmallSquare;': '\u25ab',
+    'emsp13;': '\u2004',
+    'emsp14;': '\u2005',
+    'emsp;': '\u2003',
+    'ENG;': '\u014a',
+    'eng;': '\u014b',
+    'ensp;': '\u2002',
+    'Eogon;': '\u0118',
+    'eogon;': '\u0119',
+    'Eopf;': '\U0001d53c',
+    'eopf;': '\U0001d556',
+    'epar;': '\u22d5',
+    'eparsl;': '\u29e3',
+    'eplus;': '\u2a71',
+    'epsi;': '\u03b5',
+    'Epsilon;': '\u0395',
+    'epsilon;': '\u03b5',
+    'epsiv;': '\u03f5',
+    'eqcirc;': '\u2256',
+    'eqcolon;': '\u2255',
+    'eqsim;': '\u2242',
+    'eqslantgtr;': '\u2a96',
+    'eqslantless;': '\u2a95',
+    'Equal;': '\u2a75',
+    'equals;': '=',
+    'EqualTilde;': '\u2242',
+    'equest;': '\u225f',
+    'Equilibrium;': '\u21cc',
+    'equiv;': '\u2261',
+    'equivDD;': '\u2a78',
+    'eqvparsl;': '\u29e5',
+    'erarr;': '\u2971',
+    'erDot;': '\u2253',
+    'Escr;': '\u2130',
+    'escr;': '\u212f',
+    'esdot;': '\u2250',
+    'Esim;': '\u2a73',
+    'esim;': '\u2242',
+    'Eta;': '\u0397',
+    'eta;': '\u03b7',
+    'ETH': '\xd0',
+    'eth': '\xf0',
+    'ETH;': '\xd0',
+    'eth;': '\xf0',
+    'Euml': '\xcb',
+    'euml': '\xeb',
+    'Euml;': '\xcb',
+    'euml;': '\xeb',
+    'euro;': '\u20ac',
+    'excl;': '!',
+    'exist;': '\u2203',
+    'Exists;': '\u2203',
+    'expectation;': '\u2130',
+    'ExponentialE;': '\u2147',
+    'exponentiale;': '\u2147',
+    'fallingdotseq;': '\u2252',
+    'Fcy;': '\u0424',
+    'fcy;': '\u0444',
+    'female;': '\u2640',
+    'ffilig;': '\ufb03',
+    'fflig;': '\ufb00',
+    'ffllig;': '\ufb04',
+    'Ffr;': '\U0001d509',
+    'ffr;': '\U0001d523',
+    'filig;': '\ufb01',
+    'FilledSmallSquare;': '\u25fc',
+    'FilledVerySmallSquare;': '\u25aa',
+    'fjlig;': 'fj',
+    'flat;': '\u266d',
+    'fllig;': '\ufb02',
+    'fltns;': '\u25b1',
+    'fnof;': '\u0192',
+    'Fopf;': '\U0001d53d',
+    'fopf;': '\U0001d557',
+    'ForAll;': '\u2200',
+    'forall;': '\u2200',
+    'fork;': '\u22d4',
+    'forkv;': '\u2ad9',
+    'Fouriertrf;': '\u2131',
+    'fpartint;': '\u2a0d',
+    'frac12': '\xbd',
+    'frac12;': '\xbd',
+    'frac13;': '\u2153',
+    'frac14': '\xbc',
+    'frac14;': '\xbc',
+    'frac15;': '\u2155',
+    'frac16;': '\u2159',
+    'frac18;': '\u215b',
+    'frac23;': '\u2154',
+    'frac25;': '\u2156',
+    'frac34': '\xbe',
+    'frac34;': '\xbe',
+    'frac35;': '\u2157',
+    'frac38;': '\u215c',
+    'frac45;': '\u2158',
+    'frac56;': '\u215a',
+    'frac58;': '\u215d',
+    'frac78;': '\u215e',
+    'frasl;': '\u2044',
+    'frown;': '\u2322',
+    'Fscr;': '\u2131',
+    'fscr;': '\U0001d4bb',
+    'gacute;': '\u01f5',
+    'Gamma;': '\u0393',
+    'gamma;': '\u03b3',
+    'Gammad;': '\u03dc',
+    'gammad;': '\u03dd',
+    'gap;': '\u2a86',
+    'Gbreve;': '\u011e',
+    'gbreve;': '\u011f',
+    'Gcedil;': '\u0122',
+    'Gcirc;': '\u011c',
+    'gcirc;': '\u011d',
+    'Gcy;': '\u0413',
+    'gcy;': '\u0433',
+    'Gdot;': '\u0120',
+    'gdot;': '\u0121',
+    'gE;': '\u2267',
+    'ge;': '\u2265',
+    'gEl;': '\u2a8c',
+    'gel;': '\u22db',
+    'geq;': '\u2265',
+    'geqq;': '\u2267',
+    'geqslant;': '\u2a7e',
+    'ges;': '\u2a7e',
+    'gescc;': '\u2aa9',
+    'gesdot;': '\u2a80',
+    'gesdoto;': '\u2a82',
+    'gesdotol;': '\u2a84',
+    'gesl;': '\u22db\ufe00',
+    'gesles;': '\u2a94',
+    'Gfr;': '\U0001d50a',
+    'gfr;': '\U0001d524',
+    'Gg;': '\u22d9',
+    'gg;': '\u226b',
+    'ggg;': '\u22d9',
+    'gimel;': '\u2137',
+    'GJcy;': '\u0403',
+    'gjcy;': '\u0453',
+    'gl;': '\u2277',
+    'gla;': '\u2aa5',
+    'glE;': '\u2a92',
+    'glj;': '\u2aa4',
+    'gnap;': '\u2a8a',
+    'gnapprox;': '\u2a8a',
+    'gnE;': '\u2269',
+    'gne;': '\u2a88',
+    'gneq;': '\u2a88',
+    'gneqq;': '\u2269',
+    'gnsim;': '\u22e7',
+    'Gopf;': '\U0001d53e',
+    'gopf;': '\U0001d558',
+    'grave;': '`',
+    'GreaterEqual;': '\u2265',
+    'GreaterEqualLess;': '\u22db',
+    'GreaterFullEqual;': '\u2267',
+    'GreaterGreater;': '\u2aa2',
+    'GreaterLess;': '\u2277',
+    'GreaterSlantEqual;': '\u2a7e',
+    'GreaterTilde;': '\u2273',
+    'Gscr;': '\U0001d4a2',
+    'gscr;': '\u210a',
+    'gsim;': '\u2273',
+    'gsime;': '\u2a8e',
+    'gsiml;': '\u2a90',
+    'GT': '>',
+    'gt': '>',
+    'GT;': '>',
+    'Gt;': '\u226b',
+    'gt;': '>',
+    'gtcc;': '\u2aa7',
+    'gtcir;': '\u2a7a',
+    'gtdot;': '\u22d7',
+    'gtlPar;': '\u2995',
+    'gtquest;': '\u2a7c',
+    'gtrapprox;': '\u2a86',
+    'gtrarr;': '\u2978',
+    'gtrdot;': '\u22d7',
+    'gtreqless;': '\u22db',
+    'gtreqqless;': '\u2a8c',
+    'gtrless;': '\u2277',
+    'gtrsim;': '\u2273',
+    'gvertneqq;': '\u2269\ufe00',
+    'gvnE;': '\u2269\ufe00',
+    'Hacek;': '\u02c7',
+    'hairsp;': '\u200a',
+    'half;': '\xbd',
+    'hamilt;': '\u210b',
+    'HARDcy;': '\u042a',
+    'hardcy;': '\u044a',
+    'hArr;': '\u21d4',
+    'harr;': '\u2194',
+    'harrcir;': '\u2948',
+    'harrw;': '\u21ad',
+    'Hat;': '^',
+    'hbar;': '\u210f',
+    'Hcirc;': '\u0124',
+    'hcirc;': '\u0125',
+    'hearts;': '\u2665',
+    'heartsuit;': '\u2665',
+    'hellip;': '\u2026',
+    'hercon;': '\u22b9',
+    'Hfr;': '\u210c',
+    'hfr;': '\U0001d525',
+    'HilbertSpace;': '\u210b',
+    'hksearow;': '\u2925',
+    'hkswarow;': '\u2926',
+    'hoarr;': '\u21ff',
+    'homtht;': '\u223b',
+    'hookleftarrow;': '\u21a9',
+    'hookrightarrow;': '\u21aa',
+    'Hopf;': '\u210d',
+    'hopf;': '\U0001d559',
+    'horbar;': '\u2015',
+    'HorizontalLine;': '\u2500',
+    'Hscr;': '\u210b',
+    'hscr;': '\U0001d4bd',
+    'hslash;': '\u210f',
+    'Hstrok;': '\u0126',
+    'hstrok;': '\u0127',
+    'HumpDownHump;': '\u224e',
+    'HumpEqual;': '\u224f',
+    'hybull;': '\u2043',
+    'hyphen;': '\u2010',
+    'Iacute': '\xcd',
+    'iacute': '\xed',
+    'Iacute;': '\xcd',
+    'iacute;': '\xed',
+    'ic;': '\u2063',
+    'Icirc': '\xce',
+    'icirc': '\xee',
+    'Icirc;': '\xce',
+    'icirc;': '\xee',
+    'Icy;': '\u0418',
+    'icy;': '\u0438',
+    'Idot;': '\u0130',
+    'IEcy;': '\u0415',
+    'iecy;': '\u0435',
+    'iexcl': '\xa1',
+    'iexcl;': '\xa1',
+    'iff;': '\u21d4',
+    'Ifr;': '\u2111',
+    'ifr;': '\U0001d526',
+    'Igrave': '\xcc',
+    'igrave': '\xec',
+    'Igrave;': '\xcc',
+    'igrave;': '\xec',
+    'ii;': '\u2148',
+    'iiiint;': '\u2a0c',
+    'iiint;': '\u222d',
+    'iinfin;': '\u29dc',
+    'iiota;': '\u2129',
+    'IJlig;': '\u0132',
+    'ijlig;': '\u0133',
+    'Im;': '\u2111',
+    'Imacr;': '\u012a',
+    'imacr;': '\u012b',
+    'image;': '\u2111',
+    'ImaginaryI;': '\u2148',
+    'imagline;': '\u2110',
+    'imagpart;': '\u2111',
+    'imath;': '\u0131',
+    'imof;': '\u22b7',
+    'imped;': '\u01b5',
+    'Implies;': '\u21d2',
+    'in;': '\u2208',
+    'incare;': '\u2105',
+    'infin;': '\u221e',
+    'infintie;': '\u29dd',
+    'inodot;': '\u0131',
+    'Int;': '\u222c',
+    'int;': '\u222b',
+    'intcal;': '\u22ba',
+    'integers;': '\u2124',
+    'Integral;': '\u222b',
+    'intercal;': '\u22ba',
+    'Intersection;': '\u22c2',
+    'intlarhk;': '\u2a17',
+    'intprod;': '\u2a3c',
+    'InvisibleComma;': '\u2063',
+    'InvisibleTimes;': '\u2062',
+    'IOcy;': '\u0401',
+    'iocy;': '\u0451',
+    'Iogon;': '\u012e',
+    'iogon;': '\u012f',
+    'Iopf;': '\U0001d540',
+    'iopf;': '\U0001d55a',
+    'Iota;': '\u0399',
+    'iota;': '\u03b9',
+    'iprod;': '\u2a3c',
+    'iquest': '\xbf',
+    'iquest;': '\xbf',
+    'Iscr;': '\u2110',
+    'iscr;': '\U0001d4be',
+    'isin;': '\u2208',
+    'isindot;': '\u22f5',
+    'isinE;': '\u22f9',
+    'isins;': '\u22f4',
+    'isinsv;': '\u22f3',
+    'isinv;': '\u2208',
+    'it;': '\u2062',
+    'Itilde;': '\u0128',
+    'itilde;': '\u0129',
+    'Iukcy;': '\u0406',
+    'iukcy;': '\u0456',
+    'Iuml': '\xcf',
+    'iuml': '\xef',
+    'Iuml;': '\xcf',
+    'iuml;': '\xef',
+    'Jcirc;': '\u0134',
+    'jcirc;': '\u0135',
+    'Jcy;': '\u0419',
+    'jcy;': '\u0439',
+    'Jfr;': '\U0001d50d',
+    'jfr;': '\U0001d527',
+    'jmath;': '\u0237',
+    'Jopf;': '\U0001d541',
+    'jopf;': '\U0001d55b',
+    'Jscr;': '\U0001d4a5',
+    'jscr;': '\U0001d4bf',
+    'Jsercy;': '\u0408',
+    'jsercy;': '\u0458',
+    'Jukcy;': '\u0404',
+    'jukcy;': '\u0454',
+    'Kappa;': '\u039a',
+    'kappa;': '\u03ba',
+    'kappav;': '\u03f0',
+    'Kcedil;': '\u0136',
+    'kcedil;': '\u0137',
+    'Kcy;': '\u041a',
+    'kcy;': '\u043a',
+    'Kfr;': '\U0001d50e',
+    'kfr;': '\U0001d528',
+    'kgreen;': '\u0138',
+    'KHcy;': '\u0425',
+    'khcy;': '\u0445',
+    'KJcy;': '\u040c',
+    'kjcy;': '\u045c',
+    'Kopf;': '\U0001d542',
+    'kopf;': '\U0001d55c',
+    'Kscr;': '\U0001d4a6',
+    'kscr;': '\U0001d4c0',
+    'lAarr;': '\u21da',
+    'Lacute;': '\u0139',
+    'lacute;': '\u013a',
+    'laemptyv;': '\u29b4',
+    'lagran;': '\u2112',
+    'Lambda;': '\u039b',
+    'lambda;': '\u03bb',
+    'Lang;': '\u27ea',
+    'lang;': '\u27e8',
+    'langd;': '\u2991',
+    'langle;': '\u27e8',
+    'lap;': '\u2a85',
+    'Laplacetrf;': '\u2112',
+    'laquo': '\xab',
+    'laquo;': '\xab',
+    'Larr;': '\u219e',
+    'lArr;': '\u21d0',
+    'larr;': '\u2190',
+    'larrb;': '\u21e4',
+    'larrbfs;': '\u291f',
+    'larrfs;': '\u291d',
+    'larrhk;': '\u21a9',
+    'larrlp;': '\u21ab',
+    'larrpl;': '\u2939',
+    'larrsim;': '\u2973',
+    'larrtl;': '\u21a2',
+    'lat;': '\u2aab',
+    'lAtail;': '\u291b',
+    'latail;': '\u2919',
+    'late;': '\u2aad',
+    'lates;': '\u2aad\ufe00',
+    'lBarr;': '\u290e',
+    'lbarr;': '\u290c',
+    'lbbrk;': '\u2772',
+    'lbrace;': '{',
+    'lbrack;': '[',
+    'lbrke;': '\u298b',
+    'lbrksld;': '\u298f',
+    'lbrkslu;': '\u298d',
+    'Lcaron;': '\u013d',
+    'lcaron;': '\u013e',
+    'Lcedil;': '\u013b',
+    'lcedil;': '\u013c',
+    'lceil;': '\u2308',
+    'lcub;': '{',
+    'Lcy;': '\u041b',
+    'lcy;': '\u043b',
+    'ldca;': '\u2936',
+    'ldquo;': '\u201c',
+    'ldquor;': '\u201e',
+    'ldrdhar;': '\u2967',
+    'ldrushar;': '\u294b',
+    'ldsh;': '\u21b2',
+    'lE;': '\u2266',
+    'le;': '\u2264',
+    'LeftAngleBracket;': '\u27e8',
+    'LeftArrow;': '\u2190',
+    'Leftarrow;': '\u21d0',
+    'leftarrow;': '\u2190',
+    'LeftArrowBar;': '\u21e4',
+    'LeftArrowRightArrow;': '\u21c6',
+    'leftarrowtail;': '\u21a2',
+    'LeftCeiling;': '\u2308',
+    'LeftDoubleBracket;': '\u27e6',
+    'LeftDownTeeVector;': '\u2961',
+    'LeftDownVector;': '\u21c3',
+    'LeftDownVectorBar;': '\u2959',
+    'LeftFloor;': '\u230a',
+    'leftharpoondown;': '\u21bd',
+    'leftharpoonup;': '\u21bc',
+    'leftleftarrows;': '\u21c7',
+    'LeftRightArrow;': '\u2194',
+    'Leftrightarrow;': '\u21d4',
+    'leftrightarrow;': '\u2194',
+    'leftrightarrows;': '\u21c6',
+    'leftrightharpoons;': '\u21cb',
+    'leftrightsquigarrow;': '\u21ad',
+    'LeftRightVector;': '\u294e',
+    'LeftTee;': '\u22a3',
+    'LeftTeeArrow;': '\u21a4',
+    'LeftTeeVector;': '\u295a',
+    'leftthreetimes;': '\u22cb',
+    'LeftTriangle;': '\u22b2',
+    'LeftTriangleBar;': '\u29cf',
+    'LeftTriangleEqual;': '\u22b4',
+    'LeftUpDownVector;': '\u2951',
+    'LeftUpTeeVector;': '\u2960',
+    'LeftUpVector;': '\u21bf',
+    'LeftUpVectorBar;': '\u2958',
+    'LeftVector;': '\u21bc',
+    'LeftVectorBar;': '\u2952',
+    'lEg;': '\u2a8b',
+    'leg;': '\u22da',
+    'leq;': '\u2264',
+    'leqq;': '\u2266',
+    'leqslant;': '\u2a7d',
+    'les;': '\u2a7d',
+    'lescc;': '\u2aa8',
+    'lesdot;': '\u2a7f',
+    'lesdoto;': '\u2a81',
+    'lesdotor;': '\u2a83',
+    'lesg;': '\u22da\ufe00',
+    'lesges;': '\u2a93',
+    'lessapprox;': '\u2a85',
+    'lessdot;': '\u22d6',
+    'lesseqgtr;': '\u22da',
+    'lesseqqgtr;': '\u2a8b',
+    'LessEqualGreater;': '\u22da',
+    'LessFullEqual;': '\u2266',
+    'LessGreater;': '\u2276',
+    'lessgtr;': '\u2276',
+    'LessLess;': '\u2aa1',
+    'lesssim;': '\u2272',
+    'LessSlantEqual;': '\u2a7d',
+    'LessTilde;': '\u2272',
+    'lfisht;': '\u297c',
+    'lfloor;': '\u230a',
+    'Lfr;': '\U0001d50f',
+    'lfr;': '\U0001d529',
+    'lg;': '\u2276',
+    'lgE;': '\u2a91',
+    'lHar;': '\u2962',
+    'lhard;': '\u21bd',
+    'lharu;': '\u21bc',
+    'lharul;': '\u296a',
+    'lhblk;': '\u2584',
+    'LJcy;': '\u0409',
+    'ljcy;': '\u0459',
+    'Ll;': '\u22d8',
+    'll;': '\u226a',
+    'llarr;': '\u21c7',
+    'llcorner;': '\u231e',
+    'Lleftarrow;': '\u21da',
+    'llhard;': '\u296b',
+    'lltri;': '\u25fa',
+    'Lmidot;': '\u013f',
+    'lmidot;': '\u0140',
+    'lmoust;': '\u23b0',
+    'lmoustache;': '\u23b0',
+    'lnap;': '\u2a89',
+    'lnapprox;': '\u2a89',
+    'lnE;': '\u2268',
+    'lne;': '\u2a87',
+    'lneq;': '\u2a87',
+    'lneqq;': '\u2268',
+    'lnsim;': '\u22e6',
+    'loang;': '\u27ec',
+    'loarr;': '\u21fd',
+    'lobrk;': '\u27e6',
+    'LongLeftArrow;': '\u27f5',
+    'Longleftarrow;': '\u27f8',
+    'longleftarrow;': '\u27f5',
+    'LongLeftRightArrow;': '\u27f7',
+    'Longleftrightarrow;': '\u27fa',
+    'longleftrightarrow;': '\u27f7',
+    'longmapsto;': '\u27fc',
+    'LongRightArrow;': '\u27f6',
+    'Longrightarrow;': '\u27f9',
+    'longrightarrow;': '\u27f6',
+    'looparrowleft;': '\u21ab',
+    'looparrowright;': '\u21ac',
+    'lopar;': '\u2985',
+    'Lopf;': '\U0001d543',
+    'lopf;': '\U0001d55d',
+    'loplus;': '\u2a2d',
+    'lotimes;': '\u2a34',
+    'lowast;': '\u2217',
+    'lowbar;': '_',
+    'LowerLeftArrow;': '\u2199',
+    'LowerRightArrow;': '\u2198',
+    'loz;': '\u25ca',
+    'lozenge;': '\u25ca',
+    'lozf;': '\u29eb',
+    'lpar;': '(',
+    'lparlt;': '\u2993',
+    'lrarr;': '\u21c6',
+    'lrcorner;': '\u231f',
+    'lrhar;': '\u21cb',
+    'lrhard;': '\u296d',
+    'lrm;': '\u200e',
+    'lrtri;': '\u22bf',
+    'lsaquo;': '\u2039',
+    'Lscr;': '\u2112',
+    'lscr;': '\U0001d4c1',
+    'Lsh;': '\u21b0',
+    'lsh;': '\u21b0',
+    'lsim;': '\u2272',
+    'lsime;': '\u2a8d',
+    'lsimg;': '\u2a8f',
+    'lsqb;': '[',
+    'lsquo;': '\u2018',
+    'lsquor;': '\u201a',
+    'Lstrok;': '\u0141',
+    'lstrok;': '\u0142',
+    'LT': '<',
+    'lt': '<',
+    'LT;': '<',
+    'Lt;': '\u226a',
+    'lt;': '<',
+    'ltcc;': '\u2aa6',
+    'ltcir;': '\u2a79',
+    'ltdot;': '\u22d6',
+    'lthree;': '\u22cb',
+    'ltimes;': '\u22c9',
+    'ltlarr;': '\u2976',
+    'ltquest;': '\u2a7b',
+    'ltri;': '\u25c3',
+    'ltrie;': '\u22b4',
+    'ltrif;': '\u25c2',
+    'ltrPar;': '\u2996',
+    'lurdshar;': '\u294a',
+    'luruhar;': '\u2966',
+    'lvertneqq;': '\u2268\ufe00',
+    'lvnE;': '\u2268\ufe00',
+    'macr': '\xaf',
+    'macr;': '\xaf',
+    'male;': '\u2642',
+    'malt;': '\u2720',
+    'maltese;': '\u2720',
+    'Map;': '\u2905',
+    'map;': '\u21a6',
+    'mapsto;': '\u21a6',
+    'mapstodown;': '\u21a7',
+    'mapstoleft;': '\u21a4',
+    'mapstoup;': '\u21a5',
+    'marker;': '\u25ae',
+    'mcomma;': '\u2a29',
+    'Mcy;': '\u041c',
+    'mcy;': '\u043c',
+    'mdash;': '\u2014',
+    'mDDot;': '\u223a',
+    'measuredangle;': '\u2221',
+    'MediumSpace;': '\u205f',
+    'Mellintrf;': '\u2133',
+    'Mfr;': '\U0001d510',
+    'mfr;': '\U0001d52a',
+    'mho;': '\u2127',
+    'micro': '\xb5',
+    'micro;': '\xb5',
+    'mid;': '\u2223',
+    'midast;': '*',
+    'midcir;': '\u2af0',
+    'middot': '\xb7',
+    'middot;': '\xb7',
+    'minus;': '\u2212',
+    'minusb;': '\u229f',
+    'minusd;': '\u2238',
+    'minusdu;': '\u2a2a',
+    'MinusPlus;': '\u2213',
+    'mlcp;': '\u2adb',
+    'mldr;': '\u2026',
+    'mnplus;': '\u2213',
+    'models;': '\u22a7',
+    'Mopf;': '\U0001d544',
+    'mopf;': '\U0001d55e',
+    'mp;': '\u2213',
+    'Mscr;': '\u2133',
+    'mscr;': '\U0001d4c2',
+    'mstpos;': '\u223e',
+    'Mu;': '\u039c',
+    'mu;': '\u03bc',
+    'multimap;': '\u22b8',
+    'mumap;': '\u22b8',
+    'nabla;': '\u2207',
+    'Nacute;': '\u0143',
+    'nacute;': '\u0144',
+    'nang;': '\u2220\u20d2',
+    'nap;': '\u2249',
+    'napE;': '\u2a70\u0338',
+    'napid;': '\u224b\u0338',
+    'napos;': '\u0149',
+    'napprox;': '\u2249',
+    'natur;': '\u266e',
+    'natural;': '\u266e',
+    'naturals;': '\u2115',
+    'nbsp': '\xa0',
+    'nbsp;': '\xa0',
+    'nbump;': '\u224e\u0338',
+    'nbumpe;': '\u224f\u0338',
+    'ncap;': '\u2a43',
+    'Ncaron;': '\u0147',
+    'ncaron;': '\u0148',
+    'Ncedil;': '\u0145',
+    'ncedil;': '\u0146',
+    'ncong;': '\u2247',
+    'ncongdot;': '\u2a6d\u0338',
+    'ncup;': '\u2a42',
+    'Ncy;': '\u041d',
+    'ncy;': '\u043d',
+    'ndash;': '\u2013',
+    'ne;': '\u2260',
+    'nearhk;': '\u2924',
+    'neArr;': '\u21d7',
+    'nearr;': '\u2197',
+    'nearrow;': '\u2197',
+    'nedot;': '\u2250\u0338',
+    'NegativeMediumSpace;': '\u200b',
+    'NegativeThickSpace;': '\u200b',
+    'NegativeThinSpace;': '\u200b',
+    'NegativeVeryThinSpace;': '\u200b',
+    'nequiv;': '\u2262',
+    'nesear;': '\u2928',
+    'nesim;': '\u2242\u0338',
+    'NestedGreaterGreater;': '\u226b',
+    'NestedLessLess;': '\u226a',
+    'NewLine;': '\n',
+    'nexist;': '\u2204',
+    'nexists;': '\u2204',
+    'Nfr;': '\U0001d511',
+    'nfr;': '\U0001d52b',
+    'ngE;': '\u2267\u0338',
+    'nge;': '\u2271',
+    'ngeq;': '\u2271',
+    'ngeqq;': '\u2267\u0338',
+    'ngeqslant;': '\u2a7e\u0338',
+    'nges;': '\u2a7e\u0338',
+    'nGg;': '\u22d9\u0338',
+    'ngsim;': '\u2275',
+    'nGt;': '\u226b\u20d2',
+    'ngt;': '\u226f',
+    'ngtr;': '\u226f',
+    'nGtv;': '\u226b\u0338',
+    'nhArr;': '\u21ce',
+    'nharr;': '\u21ae',
+    'nhpar;': '\u2af2',
+    'ni;': '\u220b',
+    'nis;': '\u22fc',
+    'nisd;': '\u22fa',
+    'niv;': '\u220b',
+    'NJcy;': '\u040a',
+    'njcy;': '\u045a',
+    'nlArr;': '\u21cd',
+    'nlarr;': '\u219a',
+    'nldr;': '\u2025',
+    'nlE;': '\u2266\u0338',
+    'nle;': '\u2270',
+    'nLeftarrow;': '\u21cd',
+    'nleftarrow;': '\u219a',
+    'nLeftrightarrow;': '\u21ce',
+    'nleftrightarrow;': '\u21ae',
+    'nleq;': '\u2270',
+    'nleqq;': '\u2266\u0338',
+    'nleqslant;': '\u2a7d\u0338',
+    'nles;': '\u2a7d\u0338',
+    'nless;': '\u226e',
+    'nLl;': '\u22d8\u0338',
+    'nlsim;': '\u2274',
+    'nLt;': '\u226a\u20d2',
+    'nlt;': '\u226e',
+    'nltri;': '\u22ea',
+    'nltrie;': '\u22ec',
+    'nLtv;': '\u226a\u0338',
+    'nmid;': '\u2224',
+    'NoBreak;': '\u2060',
+    'NonBreakingSpace;': '\xa0',
+    'Nopf;': '\u2115',
+    'nopf;': '\U0001d55f',
+    'not': '\xac',
+    'Not;': '\u2aec',
+    'not;': '\xac',
+    'NotCongruent;': '\u2262',
+    'NotCupCap;': '\u226d',
+    'NotDoubleVerticalBar;': '\u2226',
+    'NotElement;': '\u2209',
+    'NotEqual;': '\u2260',
+    'NotEqualTilde;': '\u2242\u0338',
+    'NotExists;': '\u2204',
+    'NotGreater;': '\u226f',
+    'NotGreaterEqual;': '\u2271',
+    'NotGreaterFullEqual;': '\u2267\u0338',
+    'NotGreaterGreater;': '\u226b\u0338',
+    'NotGreaterLess;': '\u2279',
+    'NotGreaterSlantEqual;': '\u2a7e\u0338',
+    'NotGreaterTilde;': '\u2275',
+    'NotHumpDownHump;': '\u224e\u0338',
+    'NotHumpEqual;': '\u224f\u0338',
+    'notin;': '\u2209',
+    'notindot;': '\u22f5\u0338',
+    'notinE;': '\u22f9\u0338',
+    'notinva;': '\u2209',
+    'notinvb;': '\u22f7',
+    'notinvc;': '\u22f6',
+    'NotLeftTriangle;': '\u22ea',
+    'NotLeftTriangleBar;': '\u29cf\u0338',
+    'NotLeftTriangleEqual;': '\u22ec',
+    'NotLess;': '\u226e',
+    'NotLessEqual;': '\u2270',
+    'NotLessGreater;': '\u2278',
+    'NotLessLess;': '\u226a\u0338',
+    'NotLessSlantEqual;': '\u2a7d\u0338',
+    'NotLessTilde;': '\u2274',
+    'NotNestedGreaterGreater;': '\u2aa2\u0338',
+    'NotNestedLessLess;': '\u2aa1\u0338',
+    'notni;': '\u220c',
+    'notniva;': '\u220c',
+    'notnivb;': '\u22fe',
+    'notnivc;': '\u22fd',
+    'NotPrecedes;': '\u2280',
+    'NotPrecedesEqual;': '\u2aaf\u0338',
+    'NotPrecedesSlantEqual;': '\u22e0',
+    'NotReverseElement;': '\u220c',
+    'NotRightTriangle;': '\u22eb',
+    'NotRightTriangleBar;': '\u29d0\u0338',
+    'NotRightTriangleEqual;': '\u22ed',
+    'NotSquareSubset;': '\u228f\u0338',
+    'NotSquareSubsetEqual;': '\u22e2',
+    'NotSquareSuperset;': '\u2290\u0338',
+    'NotSquareSupersetEqual;': '\u22e3',
+    'NotSubset;': '\u2282\u20d2',
+    'NotSubsetEqual;': '\u2288',
+    'NotSucceeds;': '\u2281',
+    'NotSucceedsEqual;': '\u2ab0\u0338',
+    'NotSucceedsSlantEqual;': '\u22e1',
+    'NotSucceedsTilde;': '\u227f\u0338',
+    'NotSuperset;': '\u2283\u20d2',
+    'NotSupersetEqual;': '\u2289',
+    'NotTilde;': '\u2241',
+    'NotTildeEqual;': '\u2244',
+    'NotTildeFullEqual;': '\u2247',
+    'NotTildeTilde;': '\u2249',
+    'NotVerticalBar;': '\u2224',
+    'npar;': '\u2226',
+    'nparallel;': '\u2226',
+    'nparsl;': '\u2afd\u20e5',
+    'npart;': '\u2202\u0338',
+    'npolint;': '\u2a14',
+    'npr;': '\u2280',
+    'nprcue;': '\u22e0',
+    'npre;': '\u2aaf\u0338',
+    'nprec;': '\u2280',
+    'npreceq;': '\u2aaf\u0338',
+    'nrArr;': '\u21cf',
+    'nrarr;': '\u219b',
+    'nrarrc;': '\u2933\u0338',
+    'nrarrw;': '\u219d\u0338',
+    'nRightarrow;': '\u21cf',
+    'nrightarrow;': '\u219b',
+    'nrtri;': '\u22eb',
+    'nrtrie;': '\u22ed',
+    'nsc;': '\u2281',
+    'nsccue;': '\u22e1',
+    'nsce;': '\u2ab0\u0338',
+    'Nscr;': '\U0001d4a9',
+    'nscr;': '\U0001d4c3',
+    'nshortmid;': '\u2224',
+    'nshortparallel;': '\u2226',
+    'nsim;': '\u2241',
+    'nsime;': '\u2244',
+    'nsimeq;': '\u2244',
+    'nsmid;': '\u2224',
+    'nspar;': '\u2226',
+    'nsqsube;': '\u22e2',
+    'nsqsupe;': '\u22e3',
+    'nsub;': '\u2284',
+    'nsubE;': '\u2ac5\u0338',
+    'nsube;': '\u2288',
+    'nsubset;': '\u2282\u20d2',
+    'nsubseteq;': '\u2288',
+    'nsubseteqq;': '\u2ac5\u0338',
+    'nsucc;': '\u2281',
+    'nsucceq;': '\u2ab0\u0338',
+    'nsup;': '\u2285',
+    'nsupE;': '\u2ac6\u0338',
+    'nsupe;': '\u2289',
+    'nsupset;': '\u2283\u20d2',
+    'nsupseteq;': '\u2289',
+    'nsupseteqq;': '\u2ac6\u0338',
+    'ntgl;': '\u2279',
+    'Ntilde': '\xd1',
+    'ntilde': '\xf1',
+    'Ntilde;': '\xd1',
+    'ntilde;': '\xf1',
+    'ntlg;': '\u2278',
+    'ntriangleleft;': '\u22ea',
+    'ntrianglelefteq;': '\u22ec',
+    'ntriangleright;': '\u22eb',
+    'ntrianglerighteq;': '\u22ed',
+    'Nu;': '\u039d',
+    'nu;': '\u03bd',
+    'num;': '#',
+    'numero;': '\u2116',
+    'numsp;': '\u2007',
+    'nvap;': '\u224d\u20d2',
+    'nVDash;': '\u22af',
+    'nVdash;': '\u22ae',
+    'nvDash;': '\u22ad',
+    'nvdash;': '\u22ac',
+    'nvge;': '\u2265\u20d2',
+    'nvgt;': '>\u20d2',
+    'nvHarr;': '\u2904',
+    'nvinfin;': '\u29de',
+    'nvlArr;': '\u2902',
+    'nvle;': '\u2264\u20d2',
+    'nvlt;': '<\u20d2',
+    'nvltrie;': '\u22b4\u20d2',
+    'nvrArr;': '\u2903',
+    'nvrtrie;': '\u22b5\u20d2',
+    'nvsim;': '\u223c\u20d2',
+    'nwarhk;': '\u2923',
+    'nwArr;': '\u21d6',
+    'nwarr;': '\u2196',
+    'nwarrow;': '\u2196',
+    'nwnear;': '\u2927',
+    'Oacute': '\xd3',
+    'oacute': '\xf3',
+    'Oacute;': '\xd3',
+    'oacute;': '\xf3',
+    'oast;': '\u229b',
+    'ocir;': '\u229a',
+    'Ocirc': '\xd4',
+    'ocirc': '\xf4',
+    'Ocirc;': '\xd4',
+    'ocirc;': '\xf4',
+    'Ocy;': '\u041e',
+    'ocy;': '\u043e',
+    'odash;': '\u229d',
+    'Odblac;': '\u0150',
+    'odblac;': '\u0151',
+    'odiv;': '\u2a38',
+    'odot;': '\u2299',
+    'odsold;': '\u29bc',
+    'OElig;': '\u0152',
+    'oelig;': '\u0153',
+    'ofcir;': '\u29bf',
+    'Ofr;': '\U0001d512',
+    'ofr;': '\U0001d52c',
+    'ogon;': '\u02db',
+    'Ograve': '\xd2',
+    'ograve': '\xf2',
+    'Ograve;': '\xd2',
+    'ograve;': '\xf2',
+    'ogt;': '\u29c1',
+    'ohbar;': '\u29b5',
+    'ohm;': '\u03a9',
+    'oint;': '\u222e',
+    'olarr;': '\u21ba',
+    'olcir;': '\u29be',
+    'olcross;': '\u29bb',
+    'oline;': '\u203e',
+    'olt;': '\u29c0',
+    'Omacr;': '\u014c',
+    'omacr;': '\u014d',
+    'Omega;': '\u03a9',
+    'omega;': '\u03c9',
+    'Omicron;': '\u039f',
+    'omicron;': '\u03bf',
+    'omid;': '\u29b6',
+    'ominus;': '\u2296',
+    'Oopf;': '\U0001d546',
+    'oopf;': '\U0001d560',
+    'opar;': '\u29b7',
+    'OpenCurlyDoubleQuote;': '\u201c',
+    'OpenCurlyQuote;': '\u2018',
+    'operp;': '\u29b9',
+    'oplus;': '\u2295',
+    'Or;': '\u2a54',
+    'or;': '\u2228',
+    'orarr;': '\u21bb',
+    'ord;': '\u2a5d',
+    'order;': '\u2134',
+    'orderof;': '\u2134',
+    'ordf': '\xaa',
+    'ordf;': '\xaa',
+    'ordm': '\xba',
+    'ordm;': '\xba',
+    'origof;': '\u22b6',
+    'oror;': '\u2a56',
+    'orslope;': '\u2a57',
+    'orv;': '\u2a5b',
+    'oS;': '\u24c8',
+    'Oscr;': '\U0001d4aa',
+    'oscr;': '\u2134',
+    'Oslash': '\xd8',
+    'oslash': '\xf8',
+    'Oslash;': '\xd8',
+    'oslash;': '\xf8',
+    'osol;': '\u2298',
+    'Otilde': '\xd5',
+    'otilde': '\xf5',
+    'Otilde;': '\xd5',
+    'otilde;': '\xf5',
+    'Otimes;': '\u2a37',
+    'otimes;': '\u2297',
+    'otimesas;': '\u2a36',
+    'Ouml': '\xd6',
+    'ouml': '\xf6',
+    'Ouml;': '\xd6',
+    'ouml;': '\xf6',
+    'ovbar;': '\u233d',
+    'OverBar;': '\u203e',
+    'OverBrace;': '\u23de',
+    'OverBracket;': '\u23b4',
+    'OverParenthesis;': '\u23dc',
+    'par;': '\u2225',
+    'para': '\xb6',
+    'para;': '\xb6',
+    'parallel;': '\u2225',
+    'parsim;': '\u2af3',
+    'parsl;': '\u2afd',
+    'part;': '\u2202',
+    'PartialD;': '\u2202',
+    'Pcy;': '\u041f',
+    'pcy;': '\u043f',
+    'percnt;': '%',
+    'period;': '.',
+    'permil;': '\u2030',
+    'perp;': '\u22a5',
+    'pertenk;': '\u2031',
+    'Pfr;': '\U0001d513',
+    'pfr;': '\U0001d52d',
+    'Phi;': '\u03a6',
+    'phi;': '\u03c6',
+    'phiv;': '\u03d5',
+    'phmmat;': '\u2133',
+    'phone;': '\u260e',
+    'Pi;': '\u03a0',
+    'pi;': '\u03c0',
+    'pitchfork;': '\u22d4',
+    'piv;': '\u03d6',
+    'planck;': '\u210f',
+    'planckh;': '\u210e',
+    'plankv;': '\u210f',
+    'plus;': '+',
+    'plusacir;': '\u2a23',
+    'plusb;': '\u229e',
+    'pluscir;': '\u2a22',
+    'plusdo;': '\u2214',
+    'plusdu;': '\u2a25',
+    'pluse;': '\u2a72',
+    'PlusMinus;': '\xb1',
+    'plusmn': '\xb1',
+    'plusmn;': '\xb1',
+    'plussim;': '\u2a26',
+    'plustwo;': '\u2a27',
+    'pm;': '\xb1',
+    'Poincareplane;': '\u210c',
+    'pointint;': '\u2a15',
+    'Popf;': '\u2119',
+    'popf;': '\U0001d561',
+    'pound': '\xa3',
+    'pound;': '\xa3',
+    'Pr;': '\u2abb',
+    'pr;': '\u227a',
+    'prap;': '\u2ab7',
+    'prcue;': '\u227c',
+    'prE;': '\u2ab3',
+    'pre;': '\u2aaf',
+    'prec;': '\u227a',
+    'precapprox;': '\u2ab7',
+    'preccurlyeq;': '\u227c',
+    'Precedes;': '\u227a',
+    'PrecedesEqual;': '\u2aaf',
+    'PrecedesSlantEqual;': '\u227c',
+    'PrecedesTilde;': '\u227e',
+    'preceq;': '\u2aaf',
+    'precnapprox;': '\u2ab9',
+    'precneqq;': '\u2ab5',
+    'precnsim;': '\u22e8',
+    'precsim;': '\u227e',
+    'Prime;': '\u2033',
+    'prime;': '\u2032',
+    'primes;': '\u2119',
+    'prnap;': '\u2ab9',
+    'prnE;': '\u2ab5',
+    'prnsim;': '\u22e8',
+    'prod;': '\u220f',
+    'Product;': '\u220f',
+    'profalar;': '\u232e',
+    'profline;': '\u2312',
+    'profsurf;': '\u2313',
+    'prop;': '\u221d',
+    'Proportion;': '\u2237',
+    'Proportional;': '\u221d',
+    'propto;': '\u221d',
+    'prsim;': '\u227e',
+    'prurel;': '\u22b0',
+    'Pscr;': '\U0001d4ab',
+    'pscr;': '\U0001d4c5',
+    'Psi;': '\u03a8',
+    'psi;': '\u03c8',
+    'puncsp;': '\u2008',
+    'Qfr;': '\U0001d514',
+    'qfr;': '\U0001d52e',
+    'qint;': '\u2a0c',
+    'Qopf;': '\u211a',
+    'qopf;': '\U0001d562',
+    'qprime;': '\u2057',
+    'Qscr;': '\U0001d4ac',
+    'qscr;': '\U0001d4c6',
+    'quaternions;': '\u210d',
+    'quatint;': '\u2a16',
+    'quest;': '?',
+    'questeq;': '\u225f',
+    'QUOT': '"',
+    'quot': '"',
+    'QUOT;': '"',
+    'quot;': '"',
+    'rAarr;': '\u21db',
+    'race;': '\u223d\u0331',
+    'Racute;': '\u0154',
+    'racute;': '\u0155',
+    'radic;': '\u221a',
+    'raemptyv;': '\u29b3',
+    'Rang;': '\u27eb',
+    'rang;': '\u27e9',
+    'rangd;': '\u2992',
+    'range;': '\u29a5',
+    'rangle;': '\u27e9',
+    'raquo': '\xbb',
+    'raquo;': '\xbb',
+    'Rarr;': '\u21a0',
+    'rArr;': '\u21d2',
+    'rarr;': '\u2192',
+    'rarrap;': '\u2975',
+    'rarrb;': '\u21e5',
+    'rarrbfs;': '\u2920',
+    'rarrc;': '\u2933',
+    'rarrfs;': '\u291e',
+    'rarrhk;': '\u21aa',
+    'rarrlp;': '\u21ac',
+    'rarrpl;': '\u2945',
+    'rarrsim;': '\u2974',
+    'Rarrtl;': '\u2916',
+    'rarrtl;': '\u21a3',
+    'rarrw;': '\u219d',
+    'rAtail;': '\u291c',
+    'ratail;': '\u291a',
+    'ratio;': '\u2236',
+    'rationals;': '\u211a',
+    'RBarr;': '\u2910',
+    'rBarr;': '\u290f',
+    'rbarr;': '\u290d',
+    'rbbrk;': '\u2773',
+    'rbrace;': '}',
+    'rbrack;': ']',
+    'rbrke;': '\u298c',
+    'rbrksld;': '\u298e',
+    'rbrkslu;': '\u2990',
+    'Rcaron;': '\u0158',
+    'rcaron;': '\u0159',
+    'Rcedil;': '\u0156',
+    'rcedil;': '\u0157',
+    'rceil;': '\u2309',
+    'rcub;': '}',
+    'Rcy;': '\u0420',
+    'rcy;': '\u0440',
+    'rdca;': '\u2937',
+    'rdldhar;': '\u2969',
+    'rdquo;': '\u201d',
+    'rdquor;': '\u201d',
+    'rdsh;': '\u21b3',
+    'Re;': '\u211c',
+    'real;': '\u211c',
+    'realine;': '\u211b',
+    'realpart;': '\u211c',
+    'reals;': '\u211d',
+    'rect;': '\u25ad',
+    'REG': '\xae',
+    'reg': '\xae',
+    'REG;': '\xae',
+    'reg;': '\xae',
+    'ReverseElement;': '\u220b',
+    'ReverseEquilibrium;': '\u21cb',
+    'ReverseUpEquilibrium;': '\u296f',
+    'rfisht;': '\u297d',
+    'rfloor;': '\u230b',
+    'Rfr;': '\u211c',
+    'rfr;': '\U0001d52f',
+    'rHar;': '\u2964',
+    'rhard;': '\u21c1',
+    'rharu;': '\u21c0',
+    'rharul;': '\u296c',
+    'Rho;': '\u03a1',
+    'rho;': '\u03c1',
+    'rhov;': '\u03f1',
+    'RightAngleBracket;': '\u27e9',
+    'RightArrow;': '\u2192',
+    'Rightarrow;': '\u21d2',
+    'rightarrow;': '\u2192',
+    'RightArrowBar;': '\u21e5',
+    'RightArrowLeftArrow;': '\u21c4',
+    'rightarrowtail;': '\u21a3',
+    'RightCeiling;': '\u2309',
+    'RightDoubleBracket;': '\u27e7',
+    'RightDownTeeVector;': '\u295d',
+    'RightDownVector;': '\u21c2',
+    'RightDownVectorBar;': '\u2955',
+    'RightFloor;': '\u230b',
+    'rightharpoondown;': '\u21c1',
+    'rightharpoonup;': '\u21c0',
+    'rightleftarrows;': '\u21c4',
+    'rightleftharpoons;': '\u21cc',
+    'rightrightarrows;': '\u21c9',
+    'rightsquigarrow;': '\u219d',
+    'RightTee;': '\u22a2',
+    'RightTeeArrow;': '\u21a6',
+    'RightTeeVector;': '\u295b',
+    'rightthreetimes;': '\u22cc',
+    'RightTriangle;': '\u22b3',
+    'RightTriangleBar;': '\u29d0',
+    'RightTriangleEqual;': '\u22b5',
+    'RightUpDownVector;': '\u294f',
+    'RightUpTeeVector;': '\u295c',
+    'RightUpVector;': '\u21be',
+    'RightUpVectorBar;': '\u2954',
+    'RightVector;': '\u21c0',
+    'RightVectorBar;': '\u2953',
+    'ring;': '\u02da',
+    'risingdotseq;': '\u2253',
+    'rlarr;': '\u21c4',
+    'rlhar;': '\u21cc',
+    'rlm;': '\u200f',
+    'rmoust;': '\u23b1',
+    'rmoustache;': '\u23b1',
+    'rnmid;': '\u2aee',
+    'roang;': '\u27ed',
+    'roarr;': '\u21fe',
+    'robrk;': '\u27e7',
+    'ropar;': '\u2986',
+    'Ropf;': '\u211d',
+    'ropf;': '\U0001d563',
+    'roplus;': '\u2a2e',
+    'rotimes;': '\u2a35',
+    'RoundImplies;': '\u2970',
+    'rpar;': ')',
+    'rpargt;': '\u2994',
+    'rppolint;': '\u2a12',
+    'rrarr;': '\u21c9',
+    'Rrightarrow;': '\u21db',
+    'rsaquo;': '\u203a',
+    'Rscr;': '\u211b',
+    'rscr;': '\U0001d4c7',
+    'Rsh;': '\u21b1',
+    'rsh;': '\u21b1',
+    'rsqb;': ']',
+    'rsquo;': '\u2019',
+    'rsquor;': '\u2019',
+    'rthree;': '\u22cc',
+    'rtimes;': '\u22ca',
+    'rtri;': '\u25b9',
+    'rtrie;': '\u22b5',
+    'rtrif;': '\u25b8',
+    'rtriltri;': '\u29ce',
+    'RuleDelayed;': '\u29f4',
+    'ruluhar;': '\u2968',
+    'rx;': '\u211e',
+    'Sacute;': '\u015a',
+    'sacute;': '\u015b',
+    'sbquo;': '\u201a',
+    'Sc;': '\u2abc',
+    'sc;': '\u227b',
+    'scap;': '\u2ab8',
+    'Scaron;': '\u0160',
+    'scaron;': '\u0161',
+    'sccue;': '\u227d',
+    'scE;': '\u2ab4',
+    'sce;': '\u2ab0',
+    'Scedil;': '\u015e',
+    'scedil;': '\u015f',
+    'Scirc;': '\u015c',
+    'scirc;': '\u015d',
+    'scnap;': '\u2aba',
+    'scnE;': '\u2ab6',
+    'scnsim;': '\u22e9',
+    'scpolint;': '\u2a13',
+    'scsim;': '\u227f',
+    'Scy;': '\u0421',
+    'scy;': '\u0441',
+    'sdot;': '\u22c5',
+    'sdotb;': '\u22a1',
+    'sdote;': '\u2a66',
+    'searhk;': '\u2925',
+    'seArr;': '\u21d8',
+    'searr;': '\u2198',
+    'searrow;': '\u2198',
+    'sect': '\xa7',
+    'sect;': '\xa7',
+    'semi;': ';',
+    'seswar;': '\u2929',
+    'setminus;': '\u2216',
+    'setmn;': '\u2216',
+    'sext;': '\u2736',
+    'Sfr;': '\U0001d516',
+    'sfr;': '\U0001d530',
+    'sfrown;': '\u2322',
+    'sharp;': '\u266f',
+    'SHCHcy;': '\u0429',
+    'shchcy;': '\u0449',
+    'SHcy;': '\u0428',
+    'shcy;': '\u0448',
+    'ShortDownArrow;': '\u2193',
+    'ShortLeftArrow;': '\u2190',
+    'shortmid;': '\u2223',
+    'shortparallel;': '\u2225',
+    'ShortRightArrow;': '\u2192',
+    'ShortUpArrow;': '\u2191',
+    'shy': '\xad',
+    'shy;': '\xad',
+    'Sigma;': '\u03a3',
+    'sigma;': '\u03c3',
+    'sigmaf;': '\u03c2',
+    'sigmav;': '\u03c2',
+    'sim;': '\u223c',
+    'simdot;': '\u2a6a',
+    'sime;': '\u2243',
+    'simeq;': '\u2243',
+    'simg;': '\u2a9e',
+    'simgE;': '\u2aa0',
+    'siml;': '\u2a9d',
+    'simlE;': '\u2a9f',
+    'simne;': '\u2246',
+    'simplus;': '\u2a24',
+    'simrarr;': '\u2972',
+    'slarr;': '\u2190',
+    'SmallCircle;': '\u2218',
+    'smallsetminus;': '\u2216',
+    'smashp;': '\u2a33',
+    'smeparsl;': '\u29e4',
+    'smid;': '\u2223',
+    'smile;': '\u2323',
+    'smt;': '\u2aaa',
+    'smte;': '\u2aac',
+    'smtes;': '\u2aac\ufe00',
+    'SOFTcy;': '\u042c',
+    'softcy;': '\u044c',
+    'sol;': '/',
+    'solb;': '\u29c4',
+    'solbar;': '\u233f',
+    'Sopf;': '\U0001d54a',
+    'sopf;': '\U0001d564',
+    'spades;': '\u2660',
+    'spadesuit;': '\u2660',
+    'spar;': '\u2225',
+    'sqcap;': '\u2293',
+    'sqcaps;': '\u2293\ufe00',
+    'sqcup;': '\u2294',
+    'sqcups;': '\u2294\ufe00',
+    'Sqrt;': '\u221a',
+    'sqsub;': '\u228f',
+    'sqsube;': '\u2291',
+    'sqsubset;': '\u228f',
+    'sqsubseteq;': '\u2291',
+    'sqsup;': '\u2290',
+    'sqsupe;': '\u2292',
+    'sqsupset;': '\u2290',
+    'sqsupseteq;': '\u2292',
+    'squ;': '\u25a1',
+    'Square;': '\u25a1',
+    'square;': '\u25a1',
+    'SquareIntersection;': '\u2293',
+    'SquareSubset;': '\u228f',
+    'SquareSubsetEqual;': '\u2291',
+    'SquareSuperset;': '\u2290',
+    'SquareSupersetEqual;': '\u2292',
+    'SquareUnion;': '\u2294',
+    'squarf;': '\u25aa',
+    'squf;': '\u25aa',
+    'srarr;': '\u2192',
+    'Sscr;': '\U0001d4ae',
+    'sscr;': '\U0001d4c8',
+    'ssetmn;': '\u2216',
+    'ssmile;': '\u2323',
+    'sstarf;': '\u22c6',
+    'Star;': '\u22c6',
+    'star;': '\u2606',
+    'starf;': '\u2605',
+    'straightepsilon;': '\u03f5',
+    'straightphi;': '\u03d5',
+    'strns;': '\xaf',
+    'Sub;': '\u22d0',
+    'sub;': '\u2282',
+    'subdot;': '\u2abd',
+    'subE;': '\u2ac5',
+    'sube;': '\u2286',
+    'subedot;': '\u2ac3',
+    'submult;': '\u2ac1',
+    'subnE;': '\u2acb',
+    'subne;': '\u228a',
+    'subplus;': '\u2abf',
+    'subrarr;': '\u2979',
+    'Subset;': '\u22d0',
+    'subset;': '\u2282',
+    'subseteq;': '\u2286',
+    'subseteqq;': '\u2ac5',
+    'SubsetEqual;': '\u2286',
+    'subsetneq;': '\u228a',
+    'subsetneqq;': '\u2acb',
+    'subsim;': '\u2ac7',
+    'subsub;': '\u2ad5',
+    'subsup;': '\u2ad3',
+    'succ;': '\u227b',
+    'succapprox;': '\u2ab8',
+    'succcurlyeq;': '\u227d',
+    'Succeeds;': '\u227b',
+    'SucceedsEqual;': '\u2ab0',
+    'SucceedsSlantEqual;': '\u227d',
+    'SucceedsTilde;': '\u227f',
+    'succeq;': '\u2ab0',
+    'succnapprox;': '\u2aba',
+    'succneqq;': '\u2ab6',
+    'succnsim;': '\u22e9',
+    'succsim;': '\u227f',
+    'SuchThat;': '\u220b',
+    'Sum;': '\u2211',
+    'sum;': '\u2211',
+    'sung;': '\u266a',
+    'sup1': '\xb9',
+    'sup1;': '\xb9',
+    'sup2': '\xb2',
+    'sup2;': '\xb2',
+    'sup3': '\xb3',
+    'sup3;': '\xb3',
+    'Sup;': '\u22d1',
+    'sup;': '\u2283',
+    'supdot;': '\u2abe',
+    'supdsub;': '\u2ad8',
+    'supE;': '\u2ac6',
+    'supe;': '\u2287',
+    'supedot;': '\u2ac4',
+    'Superset;': '\u2283',
+    'SupersetEqual;': '\u2287',
+    'suphsol;': '\u27c9',
+    'suphsub;': '\u2ad7',
+    'suplarr;': '\u297b',
+    'supmult;': '\u2ac2',
+    'supnE;': '\u2acc',
+    'supne;': '\u228b',
+    'supplus;': '\u2ac0',
+    'Supset;': '\u22d1',
+    'supset;': '\u2283',
+    'supseteq;': '\u2287',
+    'supseteqq;': '\u2ac6',
+    'supsetneq;': '\u228b',
+    'supsetneqq;': '\u2acc',
+    'supsim;': '\u2ac8',
+    'supsub;': '\u2ad4',
+    'supsup;': '\u2ad6',
+    'swarhk;': '\u2926',
+    'swArr;': '\u21d9',
+    'swarr;': '\u2199',
+    'swarrow;': '\u2199',
+    'swnwar;': '\u292a',
+    'szlig': '\xdf',
+    'szlig;': '\xdf',
+    'Tab;': '\t',
+    'target;': '\u2316',
+    'Tau;': '\u03a4',
+    'tau;': '\u03c4',
+    'tbrk;': '\u23b4',
+    'Tcaron;': '\u0164',
+    'tcaron;': '\u0165',
+    'Tcedil;': '\u0162',
+    'tcedil;': '\u0163',
+    'Tcy;': '\u0422',
+    'tcy;': '\u0442',
+    'tdot;': '\u20db',
+    'telrec;': '\u2315',
+    'Tfr;': '\U0001d517',
+    'tfr;': '\U0001d531',
+    'there4;': '\u2234',
+    'Therefore;': '\u2234',
+    'therefore;': '\u2234',
+    'Theta;': '\u0398',
+    'theta;': '\u03b8',
+    'thetasym;': '\u03d1',
+    'thetav;': '\u03d1',
+    'thickapprox;': '\u2248',
+    'thicksim;': '\u223c',
+    'ThickSpace;': '\u205f\u200a',
+    'thinsp;': '\u2009',
+    'ThinSpace;': '\u2009',
+    'thkap;': '\u2248',
+    'thksim;': '\u223c',
+    'THORN': '\xde',
+    'thorn': '\xfe',
+    'THORN;': '\xde',
+    'thorn;': '\xfe',
+    'Tilde;': '\u223c',
+    'tilde;': '\u02dc',
+    'TildeEqual;': '\u2243',
+    'TildeFullEqual;': '\u2245',
+    'TildeTilde;': '\u2248',
+    'times': '\xd7',
+    'times;': '\xd7',
+    'timesb;': '\u22a0',
+    'timesbar;': '\u2a31',
+    'timesd;': '\u2a30',
+    'tint;': '\u222d',
+    'toea;': '\u2928',
+    'top;': '\u22a4',
+    'topbot;': '\u2336',
+    'topcir;': '\u2af1',
+    'Topf;': '\U0001d54b',
+    'topf;': '\U0001d565',
+    'topfork;': '\u2ada',
+    'tosa;': '\u2929',
+    'tprime;': '\u2034',
+    'TRADE;': '\u2122',
+    'trade;': '\u2122',
+    'triangle;': '\u25b5',
+    'triangledown;': '\u25bf',
+    'triangleleft;': '\u25c3',
+    'trianglelefteq;': '\u22b4',
+    'triangleq;': '\u225c',
+    'triangleright;': '\u25b9',
+    'trianglerighteq;': '\u22b5',
+    'tridot;': '\u25ec',
+    'trie;': '\u225c',
+    'triminus;': '\u2a3a',
+    'TripleDot;': '\u20db',
+    'triplus;': '\u2a39',
+    'trisb;': '\u29cd',
+    'tritime;': '\u2a3b',
+    'trpezium;': '\u23e2',
+    'Tscr;': '\U0001d4af',
+    'tscr;': '\U0001d4c9',
+    'TScy;': '\u0426',
+    'tscy;': '\u0446',
+    'TSHcy;': '\u040b',
+    'tshcy;': '\u045b',
+    'Tstrok;': '\u0166',
+    'tstrok;': '\u0167',
+    'twixt;': '\u226c',
+    'twoheadleftarrow;': '\u219e',
+    'twoheadrightarrow;': '\u21a0',
+    'Uacute': '\xda',
+    'uacute': '\xfa',
+    'Uacute;': '\xda',
+    'uacute;': '\xfa',
+    'Uarr;': '\u219f',
+    'uArr;': '\u21d1',
+    'uarr;': '\u2191',
+    'Uarrocir;': '\u2949',
+    'Ubrcy;': '\u040e',
+    'ubrcy;': '\u045e',
+    'Ubreve;': '\u016c',
+    'ubreve;': '\u016d',
+    'Ucirc': '\xdb',
+    'ucirc': '\xfb',
+    'Ucirc;': '\xdb',
+    'ucirc;': '\xfb',
+    'Ucy;': '\u0423',
+    'ucy;': '\u0443',
+    'udarr;': '\u21c5',
+    'Udblac;': '\u0170',
+    'udblac;': '\u0171',
+    'udhar;': '\u296e',
+    'ufisht;': '\u297e',
+    'Ufr;': '\U0001d518',
+    'ufr;': '\U0001d532',
+    'Ugrave': '\xd9',
+    'ugrave': '\xf9',
+    'Ugrave;': '\xd9',
+    'ugrave;': '\xf9',
+    'uHar;': '\u2963',
+    'uharl;': '\u21bf',
+    'uharr;': '\u21be',
+    'uhblk;': '\u2580',
+    'ulcorn;': '\u231c',
+    'ulcorner;': '\u231c',
+    'ulcrop;': '\u230f',
+    'ultri;': '\u25f8',
+    'Umacr;': '\u016a',
+    'umacr;': '\u016b',
+    'uml': '\xa8',
+    'uml;': '\xa8',
+    'UnderBar;': '_',
+    'UnderBrace;': '\u23df',
+    'UnderBracket;': '\u23b5',
+    'UnderParenthesis;': '\u23dd',
+    'Union;': '\u22c3',
+    'UnionPlus;': '\u228e',
+    'Uogon;': '\u0172',
+    'uogon;': '\u0173',
+    'Uopf;': '\U0001d54c',
+    'uopf;': '\U0001d566',
+    'UpArrow;': '\u2191',
+    'Uparrow;': '\u21d1',
+    'uparrow;': '\u2191',
+    'UpArrowBar;': '\u2912',
+    'UpArrowDownArrow;': '\u21c5',
+    'UpDownArrow;': '\u2195',
+    'Updownarrow;': '\u21d5',
+    'updownarrow;': '\u2195',
+    'UpEquilibrium;': '\u296e',
+    'upharpoonleft;': '\u21bf',
+    'upharpoonright;': '\u21be',
+    'uplus;': '\u228e',
+    'UpperLeftArrow;': '\u2196',
+    'UpperRightArrow;': '\u2197',
+    'Upsi;': '\u03d2',
+    'upsi;': '\u03c5',
+    'upsih;': '\u03d2',
+    'Upsilon;': '\u03a5',
+    'upsilon;': '\u03c5',
+    'UpTee;': '\u22a5',
+    'UpTeeArrow;': '\u21a5',
+    'upuparrows;': '\u21c8',
+    'urcorn;': '\u231d',
+    'urcorner;': '\u231d',
+    'urcrop;': '\u230e',
+    'Uring;': '\u016e',
+    'uring;': '\u016f',
+    'urtri;': '\u25f9',
+    'Uscr;': '\U0001d4b0',
+    'uscr;': '\U0001d4ca',
+    'utdot;': '\u22f0',
+    'Utilde;': '\u0168',
+    'utilde;': '\u0169',
+    'utri;': '\u25b5',
+    'utrif;': '\u25b4',
+    'uuarr;': '\u21c8',
+    'Uuml': '\xdc',
+    'uuml': '\xfc',
+    'Uuml;': '\xdc',
+    'uuml;': '\xfc',
+    'uwangle;': '\u29a7',
+    'vangrt;': '\u299c',
+    'varepsilon;': '\u03f5',
+    'varkappa;': '\u03f0',
+    'varnothing;': '\u2205',
+    'varphi;': '\u03d5',
+    'varpi;': '\u03d6',
+    'varpropto;': '\u221d',
+    'vArr;': '\u21d5',
+    'varr;': '\u2195',
+    'varrho;': '\u03f1',
+    'varsigma;': '\u03c2',
+    'varsubsetneq;': '\u228a\ufe00',
+    'varsubsetneqq;': '\u2acb\ufe00',
+    'varsupsetneq;': '\u228b\ufe00',
+    'varsupsetneqq;': '\u2acc\ufe00',
+    'vartheta;': '\u03d1',
+    'vartriangleleft;': '\u22b2',
+    'vartriangleright;': '\u22b3',
+    'Vbar;': '\u2aeb',
+    'vBar;': '\u2ae8',
+    'vBarv;': '\u2ae9',
+    'Vcy;': '\u0412',
+    'vcy;': '\u0432',
+    'VDash;': '\u22ab',
+    'Vdash;': '\u22a9',
+    'vDash;': '\u22a8',
+    'vdash;': '\u22a2',
+    'Vdashl;': '\u2ae6',
+    'Vee;': '\u22c1',
+    'vee;': '\u2228',
+    'veebar;': '\u22bb',
+    'veeeq;': '\u225a',
+    'vellip;': '\u22ee',
+    'Verbar;': '\u2016',
+    'verbar;': '|',
+    'Vert;': '\u2016',
+    'vert;': '|',
+    'VerticalBar;': '\u2223',
+    'VerticalLine;': '|',
+    'VerticalSeparator;': '\u2758',
+    'VerticalTilde;': '\u2240',
+    'VeryThinSpace;': '\u200a',
+    'Vfr;': '\U0001d519',
+    'vfr;': '\U0001d533',
+    'vltri;': '\u22b2',
+    'vnsub;': '\u2282\u20d2',
+    'vnsup;': '\u2283\u20d2',
+    'Vopf;': '\U0001d54d',
+    'vopf;': '\U0001d567',
+    'vprop;': '\u221d',
+    'vrtri;': '\u22b3',
+    'Vscr;': '\U0001d4b1',
+    'vscr;': '\U0001d4cb',
+    'vsubnE;': '\u2acb\ufe00',
+    'vsubne;': '\u228a\ufe00',
+    'vsupnE;': '\u2acc\ufe00',
+    'vsupne;': '\u228b\ufe00',
+    'Vvdash;': '\u22aa',
+    'vzigzag;': '\u299a',
+    'Wcirc;': '\u0174',
+    'wcirc;': '\u0175',
+    'wedbar;': '\u2a5f',
+    'Wedge;': '\u22c0',
+    'wedge;': '\u2227',
+    'wedgeq;': '\u2259',
+    'weierp;': '\u2118',
+    'Wfr;': '\U0001d51a',
+    'wfr;': '\U0001d534',
+    'Wopf;': '\U0001d54e',
+    'wopf;': '\U0001d568',
+    'wp;': '\u2118',
+    'wr;': '\u2240',
+    'wreath;': '\u2240',
+    'Wscr;': '\U0001d4b2',
+    'wscr;': '\U0001d4cc',
+    'xcap;': '\u22c2',
+    'xcirc;': '\u25ef',
+    'xcup;': '\u22c3',
+    'xdtri;': '\u25bd',
+    'Xfr;': '\U0001d51b',
+    'xfr;': '\U0001d535',
+    'xhArr;': '\u27fa',
+    'xharr;': '\u27f7',
+    'Xi;': '\u039e',
+    'xi;': '\u03be',
+    'xlArr;': '\u27f8',
+    'xlarr;': '\u27f5',
+    'xmap;': '\u27fc',
+    'xnis;': '\u22fb',
+    'xodot;': '\u2a00',
+    'Xopf;': '\U0001d54f',
+    'xopf;': '\U0001d569',
+    'xoplus;': '\u2a01',
+    'xotime;': '\u2a02',
+    'xrArr;': '\u27f9',
+    'xrarr;': '\u27f6',
+    'Xscr;': '\U0001d4b3',
+    'xscr;': '\U0001d4cd',
+    'xsqcup;': '\u2a06',
+    'xuplus;': '\u2a04',
+    'xutri;': '\u25b3',
+    'xvee;': '\u22c1',
+    'xwedge;': '\u22c0',
+    'Yacute': '\xdd',
+    'yacute': '\xfd',
+    'Yacute;': '\xdd',
+    'yacute;': '\xfd',
+    'YAcy;': '\u042f',
+    'yacy;': '\u044f',
+    'Ycirc;': '\u0176',
+    'ycirc;': '\u0177',
+    'Ycy;': '\u042b',
+    'ycy;': '\u044b',
+    'yen': '\xa5',
+    'yen;': '\xa5',
+    'Yfr;': '\U0001d51c',
+    'yfr;': '\U0001d536',
+    'YIcy;': '\u0407',
+    'yicy;': '\u0457',
+    'Yopf;': '\U0001d550',
+    'yopf;': '\U0001d56a',
+    'Yscr;': '\U0001d4b4',
+    'yscr;': '\U0001d4ce',
+    'YUcy;': '\u042e',
+    'yucy;': '\u044e',
+    'yuml': '\xff',
+    'Yuml;': '\u0178',
+    'yuml;': '\xff',
+    'Zacute;': '\u0179',
+    'zacute;': '\u017a',
+    'Zcaron;': '\u017d',
+    'zcaron;': '\u017e',
+    'Zcy;': '\u0417',
+    'zcy;': '\u0437',
+    'Zdot;': '\u017b',
+    'zdot;': '\u017c',
+    'zeetrf;': '\u2128',
+    'ZeroWidthSpace;': '\u200b',
+    'Zeta;': '\u0396',
+    'zeta;': '\u03b6',
+    'Zfr;': '\u2128',
+    'zfr;': '\U0001d537',
+    'ZHcy;': '\u0416',
+    'zhcy;': '\u0436',
+    'zigrarr;': '\u21dd',
+    'Zopf;': '\u2124',
+    'zopf;': '\U0001d56b',
+    'Zscr;': '\U0001d4b5',
+    'zscr;': '\U0001d4cf',
+    'zwj;': '\u200d',
+    'zwnj;': '\u200c',
+}
+
+
 class EntitySubstitution(object):
     """The ability to substitute XML or HTML entities for certain characters."""
 
     def _populate_class_variables():
-        lookup = {}
-        reverse_lookup = {}
-        characters_for_re = []
-
-        # &apos is an XHTML entity and an HTML 5, but not an HTML 4
-        # entity. We don't want to use it, but we want to recognize it on the way in.
-        #
-        # TODO: Ideally we would be able to recognize all HTML 5 named
-        # entities, but that's a little tricky.
-        extra = [(39, 'apos')]
-        for codepoint, name in list(codepoint2name.items()) + extra:
-            character = unichr(codepoint)
-            if codepoint not in (34, 39):
-                # There's no point in turning the quotation mark into
-                # &quot; or the single quote into &apos;, unless it
-                # happens within an attribute value, which is handled
-                # elsewhere.
-                characters_for_re.append(character)
-                lookup[character] = name
-            # But we do want to recognize those entities on the way in and
-            # convert them to Unicode characters.
-            reverse_lookup[name] = character
-        re_definition = "[%s]" % "".join(characters_for_re)
-        return lookup, reverse_lookup, re.compile(re_definition)
+        """Initialize variables used by this class to manage the plethora of
+        HTML5 named entities.
+
+        This function returns a 3-tuple containing two dictionaries
+        and a regular expression:
+
+        unicode_to_name - A mapping of Unicode strings like "⦨" to
+        entity names like "angmsdaa". When a single Unicode string has
+        multiple entity names, we try to choose the most commonly-used
+        name.
+
+        name_to_unicode: A mapping of entity names like "angmsdaa" to 
+        Unicode strings like "⦨".
+
+        named_entity_re: A regular expression matching (almost) any
+        Unicode string that corresponds to an HTML5 named entity.
+        """
+        unicode_to_name = {}
+        name_to_unicode = {}
+
+        short_entities = set()
+        long_entities_by_first_character = defaultdict(set)
+        
+        for name_with_semicolon, character in sorted(html5.items()):
+            # "It is intentional, for legacy compatibility, that many
+            # code points have multiple character reference names. For
+            # example, some appear both with and without the trailing
+            # semicolon, or with different capitalizations."
+            # - https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references
+            #
+            # The parsers are in charge of handling (or not) character
+            # references with no trailing semicolon, so we remove the
+            # semicolon whenever it appears.
+            if name_with_semicolon.endswith(';'):
+                name = name_with_semicolon[:-1]
+            else:
+                name = name_with_semicolon
+
+            # When parsing HTML, we want to recognize any known named
+            # entity and convert it to a sequence of Unicode
+            # characters.
+            if name not in name_to_unicode:
+                name_to_unicode[name] = character
+
+            # When _generating_ HTML, we want to recognize special
+            # character sequences that _could_ be converted to named
+            # entities.
+            unicode_to_name[character] = name
+
+            # We also need to build a regular expression that lets us
+            # _find_ those characters in output strings so we can
+            # replace them.
+            #
+            # This is tricky, for two reasons.
+
+            if (len(character) == 1 and ord(character) < 128
+                and character not in '<>&'):
+                # First, it would be annoying to turn single ASCII
+                # characters like | into named entities like
+                # &verbar;. The exceptions are <>&, which we _must_
+                # turn into named entities to produce valid HTML.
+                continue
+
+            if len(character) > 1 and all(ord(x) < 128 for x in character):
+                # We also do not want to turn _combinations_ of ASCII
+                # characters like 'fj' into named entities like '&fjlig;',
+                # though that's more debateable.
+                continue
+
+            # Second, some named entities have a Unicode value that's
+            # a subset of the Unicode value for some _other_ named
+            # entity.  As an example, \u2267' is &GreaterFullEqual;,
+            # but '\u2267\u0338' is &NotGreaterFullEqual;. Our regular
+            # expression needs to match the first two characters of
+            # "\u2267\u0338foo", but only the first character of
+            # "\u2267foo".
+            #
+            # In this step, we build two sets of characters that
+            # _eventually_ need to go into the regular expression. But
+            # we won't know exactly what the regular expression needs
+            # to look like until we've gone through the entire list of
+            # named entities.
+            if len(character) == 1:
+                short_entities.add(character)
+            else:
+                long_entities_by_first_character[character[0]].add(character)
+
+        # Now that we've been through the entire list of entities, we
+        # can create a regular expression that matches any of them.
+        particles = set()
+        for short in short_entities:
+            long_versions = long_entities_by_first_character[short]
+            if not long_versions:
+                particles.add(short)
+            else:
+                ignore = "".join([x[1] for x in long_versions])
+                # This finds, e.g. \u2267 but only if it is _not_
+                # followed by \u0338.
+                particles.add("%s(?![%s])" % (short, ignore))
+        
+        for long_entities in list(long_entities_by_first_character.values()):
+            for long_entity in long_entities:
+                particles.add(long_entity)
+
+        re_definition = "(%s)" % "|".join(particles)
+                
+        # If an entity shows up in both html5 and codepoint2name, it's
+        # likely that HTML5 gives it several different names, such as
+        # 'rsquo' and 'rsquor'. When converting Unicode characters to
+        # named entities, the codepoint2name name should take
+        # precedence where possible, since that's the more easily
+        # recognizable one.
+        for codepoint, name in list(codepoint2name.items()):
+            character = chr(codepoint)
+            unicode_to_name[character] = name
+
+        return unicode_to_name, name_to_unicode, re.compile(re_definition)
     (CHARACTER_TO_HTML_ENTITY, HTML_ENTITY_TO_CHARACTER,
      CHARACTER_TO_HTML_ENTITY_RE) = _populate_class_variables()
 
@@ -113,14 +2448,14 @@
     @classmethod
     def _substitute_html_entity(cls, matchobj):
         """Used with a regular expression to substitute the
-        appropriate HTML entity for a special character."""
+        appropriate HTML entity for a special character string."""
         entity = cls.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0))
         return "&%s;" % entity
 
     @classmethod
     def _substitute_xml_entity(cls, matchobj):
         """Used with a regular expression to substitute the
-        appropriate XML entity for a special character."""
+        appropriate XML entity for a special character string."""
         entity = cls.CHARACTER_TO_XML_ENTITY[matchobj.group(0)]
         return "&%s;" % entity
 
@@ -228,32 +2563,65 @@
     Order of precedence:
 
     1. Encodings you specifically tell EncodingDetector to try first
-    (the override_encodings argument to the constructor).
+    (the known_definite_encodings argument to the constructor).
+
+    2. An encoding determined by sniffing the document's byte-order mark.
 
-    2. An encoding declared within the bytestring itself, either in an
+    3. Encodings you specifically tell EncodingDetector to try if
+    byte-order mark sniffing fails (the user_encodings argument to the
+    constructor).
+
+    4. An encoding declared within the bytestring itself, either in an
     XML declaration (if the bytestring is to be interpreted as an XML
     document), or in a <meta> tag (if the bytestring is to be
     interpreted as an HTML document.)
 
-    3. An encoding detected through textual analysis by chardet,
+    5. An encoding detected through textual analysis by chardet,
     cchardet, or a similar external library.
 
     4. UTF-8.
 
     5. Windows-1252.
+
     """
-    def __init__(self, markup, override_encodings=None, is_html=False,
-                 exclude_encodings=None):
+    def __init__(self, markup, known_definite_encodings=None,
+                 is_html=False, exclude_encodings=None,
+                 user_encodings=None, override_encodings=None):
         """Constructor.
 
         :param markup: Some markup in an unknown encoding.
-        :param override_encodings: These encodings will be tried first.
-        :param is_html: If True, this markup is considered to be HTML. Otherwise
-            it's assumed to be XML.
-        :param exclude_encodings: These encodings will not be tried, even
-            if they otherwise would be.
+
+        :param known_definite_encodings: When determining the encoding
+            of `markup`, these encodings will be tried first, in
+            order. In HTML terms, this corresponds to the "known
+            definite encoding" step defined here:
+            https://html.spec.whatwg.org/multipage/parsing.html#parsing-with-a-known-character-encoding
+
+        :param user_encodings: These encodings will be tried after the
+            `known_definite_encodings` have been tried and failed, and
+            after an attempt to sniff the encoding by looking at a
+            byte order mark has failed. In HTML terms, this
+            corresponds to the step "user has explicitly instructed
+            the user agent to override the document's character
+            encoding", defined here:
+            https://html.spec.whatwg.org/multipage/parsing.html#determining-the-character-encoding
+
+        :param override_encodings: A deprecated alias for
+            known_definite_encodings. Any encodings here will be tried
+            immediately after the encodings in
+            known_definite_encodings.
+
+        :param is_html: If True, this markup is considered to be
+            HTML. Otherwise it's assumed to be XML.
+
+        :param exclude_encodings: These encodings will not be tried,
+            even if they otherwise would be.
+
         """
-        self.override_encodings = override_encodings or []
+        self.known_definite_encodings = list(known_definite_encodings or [])
+        if override_encodings:
+            self.known_definite_encodings += override_encodings
+        self.user_encodings = user_encodings or []
         exclude_encodings = exclude_encodings or []
         self.exclude_encodings = set([x.lower() for x in exclude_encodings])
         self.chardet_encoding = None
@@ -286,7 +2654,9 @@
         :yield: A sequence of strings.
         """
         tried = set()
-        for e in self.override_encodings:
+
+        # First, try the known definite encodings
+        for e in self.known_definite_encodings:
             if self._usable(e, tried):
                 yield e
 
@@ -295,6 +2665,12 @@
         if self._usable(self.sniffed_encoding, tried):
             yield self.sniffed_encoding
 
+        # Sniffing the byte-order mark did nothing; try the user
+        # encodings.
+        for e in self.user_encodings:
+            if self._usable(e, tried):
+                yield e
+            
         # Look within the document for an XML or HTML encoding
         # declaration.
         if self.declared_encoding is None:
@@ -323,7 +2699,7 @@
         :return: A 2-tuple (modified data, implied encoding)
         """
         encoding = None
-        if isinstance(data, unicode):
+        if isinstance(data, str):
             # Unicode data cannot have a byte-order mark.
             return data, encoding
         if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \
@@ -370,7 +2746,7 @@
         if isinstance(markup, bytes):
             res = encoding_res[bytes]
         else:
-            res = encoding_res[unicode]
+            res = encoding_res[str]
 
         xml_re = res['xml']
         html_re = res['html']
@@ -405,13 +2781,33 @@
         "iso-8859-2",
         ]
 
-    def __init__(self, markup, override_encodings=[],
-                 smart_quotes_to=None, is_html=False, exclude_encodings=[]):
+    def __init__(self, markup, known_definite_encodings=[],
+                 smart_quotes_to=None, is_html=False, exclude_encodings=[],
+                 user_encodings=None, override_encodings=None
+    ):
         """Constructor.
 
         :param markup: A bytestring representing markup in an unknown encoding.
-        :param override_encodings: These encodings will be tried first,
-           before any sniffing code is run.
+
+        :param known_definite_encodings: When determining the encoding
+            of `markup`, these encodings will be tried first, in
+            order. In HTML terms, this corresponds to the "known
+            definite encoding" step defined here:
+            https://html.spec.whatwg.org/multipage/parsing.html#parsing-with-a-known-character-encoding
+
+        :param user_encodings: These encodings will be tried after the
+            `known_definite_encodings` have been tried and failed, and
+            after an attempt to sniff the encoding by looking at a
+            byte order mark has failed. In HTML terms, this
+            corresponds to the step "user has explicitly instructed
+            the user agent to override the document's character
+            encoding", defined here:
+            https://html.spec.whatwg.org/multipage/parsing.html#determining-the-character-encoding
+
+        :param override_encodings: A deprecated alias for
+            known_definite_encodings. Any encodings here will be tried
+            immediately after the encodings in
+            known_definite_encodings.
 
         :param smart_quotes_to: By default, Microsoft smart quotes will, like all other characters, be converted
            to Unicode characters. Setting this to 'ascii' will convert them to ASCII quotes instead.
@@ -421,6 +2817,7 @@
             it's assumed to be XML.
         :param exclude_encodings: These encodings will not be considered, even
             if the sniffing code thinks they might make sense.
+
         """
         self.smart_quotes_to = smart_quotes_to
         self.tried_encodings = []
@@ -428,12 +2825,14 @@
         self.is_html = is_html
         self.log = logging.getLogger(__name__)
         self.detector = EncodingDetector(
-            markup, override_encodings, is_html, exclude_encodings)
+            markup, known_definite_encodings, is_html, exclude_encodings,
+            user_encodings, override_encodings
+        )
 
         # Short-circuit if the data is in Unicode to begin with.
-        if isinstance(markup, unicode) or markup == '':
+        if isinstance(markup, str) or markup == '':
             self.markup = markup
-            self.unicode_markup = unicode(markup)
+            self.unicode_markup = str(markup)
             self.original_encoding = None
             return
 
@@ -523,7 +2922,7 @@
 
         :param encoding: The name of an encoding.
         """
-        return unicode(data, encoding, errors)
+        return str(data, encoding, errors)
 
     @property
     def declared_html_encoding(self):
diff -Nru beautifulsoup4-4.9.3/bs4/diagnose.py beautifulsoup4-4.10.0/bs4/diagnose.py
--- beautifulsoup4-4.9.3/bs4/diagnose.py	2020-05-17 17:55:43.000000000 +0000
+++ beautifulsoup4-4.10.0/bs4/diagnose.py	2021-09-07 23:36:46.000000000 +0000
@@ -4,8 +4,8 @@
 __license__ = "MIT"
 
 import cProfile
-from StringIO import StringIO
-from HTMLParser import HTMLParser
+from io import StringIO
+from html.parser import HTMLParser
 import bs4
 from bs4 import BeautifulSoup, __version__
 from bs4.builder import builder_registry
@@ -25,8 +25,8 @@
     :param data: A string containing markup that needs to be explained.
     :return: None; diagnostics are printed to standard output.
     """
-    print("Diagnostic running on Beautiful Soup %s" % __version__)
-    print("Python version %s" % sys.version)
+    print(("Diagnostic running on Beautiful Soup %s" % __version__))
+    print(("Python version %s" % sys.version))
 
     basic_parsers = ["html.parser", "html5lib", "lxml"]
     for name in basic_parsers:
@@ -35,16 +35,16 @@
                 break
         else:
             basic_parsers.remove(name)
-            print(
+            print((
                 "I noticed that %s is not installed. Installing it may help." %
-                name)
+                name))
 
     if 'lxml' in basic_parsers:
         basic_parsers.append("lxml-xml")
         try:
             from lxml import etree
-            print("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)))
-        except ImportError, e:
+            print(("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))))
+        except ImportError as e:
             print(
                 "lxml is not installed or couldn't be imported.")
 
@@ -52,21 +52,21 @@
     if 'html5lib' in basic_parsers:
         try:
             import html5lib
-            print("Found html5lib version %s" % html5lib.__version__)
-        except ImportError, e:
+            print(("Found html5lib version %s" % html5lib.__version__))
+        except ImportError as e:
             print(
                 "html5lib is not installed or couldn't be imported.")
 
     if hasattr(data, 'read'):
         data = data.read()
     elif data.startswith("http:") or data.startswith("https:"):
-        print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data)
+        print(('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data))
         print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.")
         return
     else:
         try:
             if os.path.exists(data):
-                print('"%s" looks like a filename. Reading data from the file.' % data)
+                print(('"%s" looks like a filename. Reading data from the file.' % data))
                 with open(data) as fp:
                     data = fp.read()
         except ValueError:
@@ -76,19 +76,19 @@
         print("")
 
     for parser in basic_parsers:
-        print("Trying to parse your markup with %s" % parser)
+        print(("Trying to parse your markup with %s" % parser))
         success = False
         try:
             soup = BeautifulSoup(data, features=parser)
             success = True
-        except Exception, e:
-            print("%s could not parse the markup." % parser)
+        except Exception as e:
+            print(("%s could not parse the markup." % parser))
             traceback.print_exc()
         if success:
-            print("Here's what %s did with the markup:" % parser)
-            print(soup.prettify())
+            print(("Here's what %s did with the markup:" % parser))
+            print((soup.prettify()))
 
-        print("-" * 80)
+        print(("-" * 80))
 
 def lxml_trace(data, html=True, **kwargs):
     """Print out the lxml events that occur during parsing.
@@ -104,7 +104,7 @@
     """
     from lxml import etree
     for event, element in etree.iterparse(StringIO(data), html=html, **kwargs):
-        print("%s, %4s, %s" % (event, element.tag, element.text))
+        print(("%s, %4s, %s" % (event, element.tag, element.text)))
 
 class AnnouncingParser(HTMLParser):
     """Subclass of HTMLParser that announces parse events, without doing
@@ -193,9 +193,9 @@
 
 def benchmark_parsers(num_elements=100000):
     """Very basic head-to-head performance benchmark."""
-    print("Comparative parser benchmark on Beautiful Soup %s" % __version__)
+    print(("Comparative parser benchmark on Beautiful Soup %s" % __version__))
     data = rdoc(num_elements)
-    print("Generated a large invalid HTML document (%d bytes)." % len(data))
+    print(("Generated a large invalid HTML document (%d bytes)." % len(data)))
     
     for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
         success = False
@@ -204,24 +204,24 @@
             soup = BeautifulSoup(data, parser)
             b = time.time()
             success = True
-        except Exception, e:
-            print("%s could not parse the markup." % parser)
+        except Exception as e:
+            print(("%s could not parse the markup." % parser))
             traceback.print_exc()
         if success:
-            print("BS4+%s parsed the markup in %.2fs." % (parser, b-a))
+            print(("BS4+%s parsed the markup in %.2fs." % (parser, b-a)))
 
     from lxml import etree
     a = time.time()
     etree.HTML(data)
     b = time.time()
-    print("Raw lxml parsed the markup in %.2fs." % (b-a))
+    print(("Raw lxml parsed the markup in %.2fs." % (b-a)))
 
     import html5lib
     parser = html5lib.HTMLParser()
     a = time.time()
     parser.parse(data)
     b = time.time()
-    print("Raw html5lib parsed the markup in %.2fs." % (b-a))
+    print(("Raw html5lib parsed the markup in %.2fs." % (b-a)))
 
 def profile(num_elements=100000, parser="lxml"):
     """Use Python's profiler on a randomly generated document."""
diff -Nru beautifulsoup4-4.9.3/bs4/element.py beautifulsoup4-4.10.0/bs4/element.py
--- beautifulsoup4-4.9.3/bs4/element.py	2020-10-02 22:19:12.000000000 +0000
+++ beautifulsoup4-4.10.0/bs4/element.py	2021-09-07 23:36:46.000000000 +0000
@@ -3,14 +3,14 @@
 
 try:
     from collections.abc import Callable # Python 3.6
-except ImportError , e:
+except ImportError as e:
     from collections import Callable
 import re
 import sys
 import warnings
 try:
     import soupsieve
-except ImportError, e:
+except ImportError as e:
     soupsieve = None
     warnings.warn(
         'The soupsieve package is not installed. CSS selectors cannot be used.'
@@ -57,22 +57,22 @@
 # Source:
 # https://docs.python.org/3/library/codecs.html#python-specific-encodings
 PYTHON_SPECIFIC_ENCODINGS = set([
-    u"idna",
-    u"mbcs",
-    u"oem",
-    u"palmos",
-    u"punycode",
-    u"raw_unicode_escape",
-    u"undefined",
-    u"unicode_escape",
-    u"raw-unicode-escape",
-    u"unicode-escape",
-    u"string-escape",
-    u"string_escape",
+    "idna",
+    "mbcs",
+    "oem",
+    "palmos",
+    "punycode",
+    "raw_unicode_escape",
+    "undefined",
+    "unicode_escape",
+    "raw-unicode-escape",
+    "unicode-escape",
+    "string-escape",
+    "string_escape",
 ])
     
 
-class NamespacedAttribute(unicode):
+class NamespacedAttribute(str):
     """A namespaced string (e.g. 'xml:lang') that remembers the namespace
     ('xml') and the name ('lang') that were used to create it.
     """
@@ -83,19 +83,19 @@
             # per https://www.w3.org/TR/xml-names/#defaulting
             name = None
 
-        if name is None:
-            obj = unicode.__new__(cls, prefix)
-        elif prefix is None:
+        if not name:
+            obj = str.__new__(cls, prefix)
+        elif not prefix:
             # Not really namespaced.
-            obj = unicode.__new__(cls, name)
+            obj = str.__new__(cls, name)
         else:
-            obj = unicode.__new__(cls, prefix + ":" + name)
+            obj = str.__new__(cls, prefix + ":" + name)
         obj.prefix = prefix
         obj.name = name
         obj.namespace = namespace
         return obj
 
-class AttributeValueWithCharsetSubstitution(unicode):
+class AttributeValueWithCharsetSubstitution(str):
     """A stand-in object for a character encoding specified in HTML."""
 
 class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution):
@@ -106,7 +106,7 @@
     """
 
     def __new__(cls, original_value):
-        obj = unicode.__new__(cls, original_value)
+        obj = str.__new__(cls, original_value)
         obj.original_value = original_value
         return obj
 
@@ -134,9 +134,9 @@
         match = cls.CHARSET_RE.search(original_value)
         if match is None:
             # No substitution necessary.
-            return unicode.__new__(unicode, original_value)
+            return str.__new__(str, original_value)
 
-        obj = unicode.__new__(cls, original_value)
+        obj = str.__new__(cls, original_value)
         obj.original_value = original_value
         return obj
 
@@ -255,25 +255,67 @@
     nextSibling = _alias("next_sibling")  # BS3
     previousSibling = _alias("previous_sibling")  # BS3
 
-    def replace_with(self, replace_with):
-        """Replace this PageElement with another one, keeping the rest of the
-        tree the same.
+    default = object()
+    def _all_strings(self, strip=False, types=default):
+        """Yield all strings of certain classes, possibly stripping them.
+        
+        This is implemented differently in Tag and NavigableString.
+        """
+        raise NotImplementedError()
+   
+    @property
+    def stripped_strings(self):
+        """Yield all strings in this PageElement, stripping them first.
+
+        :yield: A sequence of stripped strings.
+        """
+        for string in self._all_strings(True):
+            yield string
+
+    def get_text(self, separator="", strip=False,
+                 types=default):
+        """Get all child strings of this PageElement, concatenated using the
+        given separator.
+
+        :param separator: Strings will be concatenated using this separator.
+
+        :param strip: If True, strings will be stripped before being
+            concatenated.
+
+        :param types: A tuple of NavigableString subclasses. Any
+            strings of a subclass not found in this list will be
+            ignored. Although there are exceptions, the default
+            behavior in most cases is to consider only NavigableString
+            and CData objects. That means no comments, processing
+            instructions, etc.
+
+        :return: A string.
+        """
+        return separator.join([s for s in self._all_strings(
+                    strip, types=types)])
+    getText = get_text
+    text = property(get_text)
+    
+    def replace_with(self, *args):
+        """Replace this PageElement with one or more PageElements, keeping the 
+        rest of the tree the same.
         
-        :param replace_with: A PageElement.
+        :param args: One or more PageElements.
         :return: `self`, no longer part of the tree.
         """
         if self.parent is None:
             raise ValueError(
                 "Cannot replace one element with another when the "
                 "element to be replaced is not part of a tree.")
-        if replace_with is self:
+        if len(args) == 1 and args[0] is self:
             return
-        if replace_with is self.parent:
+        if any(x is self.parent for x in args):
             raise ValueError("Cannot replace a Tag with its parent.")
         old_parent = self.parent
         my_index = self.parent.index(self)
         self.extract(_self_index=my_index)
-        old_parent.insert(my_index, replace_with)
+        for idx, replace_with in enumerate(args, start=my_index):
+            old_parent.insert(idx, replace_with)
         return self
     replaceWith = replace_with  # BS3
 
@@ -376,7 +418,7 @@
             raise ValueError("Cannot insert None into a tag.")
         if new_child is self:
             raise ValueError("Cannot insert a tag into itself.")
-        if (isinstance(new_child, basestring)
+        if (isinstance(new_child, str)
             and not isinstance(new_child, NavigableString)):
             new_child = NavigableString(new_child)
 
@@ -753,7 +795,7 @@
                 result = (element for element in generator
                           if isinstance(element, Tag))
                 return ResultSet(strainer, result)
-            elif isinstance(name, basestring):
+            elif isinstance(name, str):
                 # Optimization to find all tags with a given name.
                 if name.count(':') == 1:
                     # This is a name with a prefix. If this is a namespace-aware document,
@@ -872,7 +914,7 @@
         return self.parents
 
 
-class NavigableString(unicode, PageElement):
+class NavigableString(str, PageElement):
     """A Python Unicode string that is part of a parse tree.
 
     When Beautiful Soup parses the markup <b>penguin</b>, it will
@@ -895,10 +937,10 @@
         passed in to the superclass's __new__ or the superclass won't know
         how to handle non-ASCII characters.
         """
-        if isinstance(value, unicode):
-            u = unicode.__new__(cls, value)
+        if isinstance(value, str):
+            u = str.__new__(cls, value)
         else:
-            u = unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
+            u = str.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
         u.setup()
         return u
 
@@ -909,7 +951,7 @@
         return type(self)(self)
 
     def __getnewargs__(self):
-        return (unicode(self),)
+        return (str(self),)
 
     def __getattr__(self, attr):
         """text.string gives you text. This is for backwards
@@ -945,7 +987,49 @@
         """Prevent NavigableString.name from ever being set."""
         raise AttributeError("A NavigableString cannot be given a name.")
 
-    
+    def _all_strings(self, strip=False, types=PageElement.default):
+        """Yield all strings of certain classes, possibly stripping them.
+
+        This makes it easy for NavigableString to implement methods
+        like get_text() as conveniences, creating a consistent
+        text-extraction API across all PageElements.
+
+        :param strip: If True, all strings will be stripped before being
+            yielded.
+
+        :param types: A tuple of NavigableString subclasses. If this
+            NavigableString isn't one of those subclasses, the
+            sequence will be empty. By default, the subclasses
+            considered are NavigableString and CData objects. That
+            means no comments, processing instructions, etc.
+
+        :yield: A sequence that either contains this string, or is empty.
+
+        """
+        if types is self.default:
+            # This is kept in Tag because it's full of subclasses of
+            # this class, which aren't defined until later in the file.
+            types = Tag.DEFAULT_INTERESTING_STRING_TYPES
+
+        # Do nothing if the caller is looking for specific types of
+        # string, and we're of a different type.
+        my_type = type(self)
+        if types is not None:
+            if isinstance(types, type):
+                # Looking for a single type.
+                if my_type is not types:
+                    return
+            elif my_type not in types:
+                # Looking for one of a list of types.
+                return
+
+        value = self
+        if strip:
+            value = value.strip()
+        if len(value) > 0:
+            yield value
+    strings = property(_all_strings)
+
 class PreformattedString(NavigableString):
     """A NavigableString not subject to the normal formatting rules.
 
@@ -975,30 +1059,30 @@
 
 class CData(PreformattedString):
     """A CDATA block."""
-    PREFIX = u'<![CDATA['
-    SUFFIX = u']]>'
+    PREFIX = '<![CDATA['
+    SUFFIX = ']]>'
 
 class ProcessingInstruction(PreformattedString):
     """A SGML processing instruction."""
 
-    PREFIX = u'<?'
-    SUFFIX = u'>'
+    PREFIX = '<?'
+    SUFFIX = '>'
 
 class XMLProcessingInstruction(ProcessingInstruction):
     """An XML processing instruction."""
-    PREFIX = u'<?'
-    SUFFIX = u'?>'
+    PREFIX = '<?'
+    SUFFIX = '?>'
 
 class Comment(PreformattedString):
     """An HTML or XML comment."""
-    PREFIX = u'<!--'
-    SUFFIX = u'-->'
+    PREFIX = '<!--'
+    SUFFIX = '-->'
 
 
 class Declaration(PreformattedString):
     """An XML declaration."""
-    PREFIX = u'<?'
-    SUFFIX = u'?>'
+    PREFIX = '<?'
+    SUFFIX = '?>'
 
 
 class Doctype(PreformattedString):
@@ -1026,8 +1110,8 @@
 
         return Doctype(value)
 
-    PREFIX = u'<!DOCTYPE '
-    SUFFIX = u'>\n'
+    PREFIX = '<!DOCTYPE '
+    SUFFIX = '>\n'
 
 
 class Stylesheet(NavigableString):
@@ -1069,7 +1153,8 @@
                  prefix=None, attrs=None, parent=None, previous=None,
                  is_xml=None, sourceline=None, sourcepos=None,
                  can_be_empty_element=None, cdata_list_attributes=None,
-                 preserve_whitespace_tags=None
+                 preserve_whitespace_tags=None,
+                 interesting_string_types=None,
     ):
         """Basic constructor.
 
@@ -1095,6 +1180,13 @@
             be treated as CDATA if they ever show up on this tag.
         :param preserve_whitespace_tags: A list of tag names whose contents
             should have their whitespace preserved.
+        :param interesting_string_types: This is a NavigableString
+            subclass or a tuple of them. When iterating over this
+            Tag's strings in methods like Tag.strings or Tag.get_text,
+            these are the types of strings that are interesting enough
+            to be considered. The default is to consider
+            NavigableString and CData the only interesting string
+            subtypes.
         """
         if parser is None:
             self.parser_class = None
@@ -1140,6 +1232,7 @@
             self.can_be_empty_element = can_be_empty_element
             self.cdata_list_attributes = cdata_list_attributes
             self.preserve_whitespace_tags = preserve_whitespace_tags
+            self.interesting_string_types = interesting_string_types
         else:
             # Set up any substitutions for this tag, such as the charset in a META tag.
             builder.set_up_substitutions(self)
@@ -1160,6 +1253,13 @@
             # Keep track of the names that might cause this tag to be treated as a
             # whitespace-preserved tag.
             self.preserve_whitespace_tags = builder.preserve_whitespace_tags
+
+            if self.name in builder.string_containers:
+                # This sort of tag uses a special string container
+                # subclass for most of its strings. When we ask the
+                self.interesting_string_types = builder.string_containers[self.name]
+            else:
+                self.interesting_string_types = self.DEFAULT_INTERESTING_STRING_TYPES
             
     parserClass = _alias("parser_class")  # BS3
 
@@ -1226,65 +1326,45 @@
         self.clear()
         self.append(string.__class__(string))
 
-    def _all_strings(self, strip=False, types=(NavigableString, CData)):
+    DEFAULT_INTERESTING_STRING_TYPES = (NavigableString, CData)
+    def _all_strings(self, strip=False, types=PageElement.default):
         """Yield all strings of certain classes, possibly stripping them.
 
         :param strip: If True, all strings will be stripped before being
             yielded.
 
-        :types: A tuple of NavigableString subclasses. Any strings of
+        :param types: A tuple of NavigableString subclasses. Any strings of
             a subclass not found in this list will be ignored. By
-            default, this means only NavigableString and CData objects
-            will be considered. So no comments, processing instructions,
-            etc.
+            default, the subclasses considered are the ones found in
+            self.interesting_string_types. If that's not specified,
+            only NavigableString and CData objects will be
+            considered. That means no comments, processing
+            instructions, etc.
 
         :yield: A sequence of strings.
+
         """
+        if types is self.default:
+            types = self.interesting_string_types
+
         for descendant in self.descendants:
-            if (
-                (types is None and not isinstance(descendant, NavigableString))
-                or
-                (types is not None and type(descendant) not in types)):
+            if (types is None and not isinstance(descendant, NavigableString)):
+                continue
+            descendant_type = type(descendant)
+            if isinstance(types, type):
+                if descendant_type is not types:
+                    # We're not interested in strings of this type.
+                    continue
+            elif types is not None and descendant_type not in types:
+                # We're not interested in strings of this type.
                 continue
             if strip:
                 descendant = descendant.strip()
                 if len(descendant) == 0:
                     continue
             yield descendant
-
     strings = property(_all_strings)
 
-    @property
-    def stripped_strings(self):
-        """Yield all strings in the document, stripping them first.
-
-        :yield: A sequence of stripped strings.
-        """
-        for string in self._all_strings(True):
-            yield string
-
-    def get_text(self, separator=u"", strip=False,
-                 types=(NavigableString, CData)):
-        """Get all child strings, concatenated using the given separator.
-
-        :param separator: Strings will be concatenated using this separator.
-
-        :param strip: If True, strings will be stripped before being
-            concatenated.
-
-        :types: A tuple of NavigableString subclasses. Any strings of
-            a subclass not found in this list will be ignored. By
-            default, this means only NavigableString and CData objects
-            will be considered. So no comments, processing instructions,
-            stylesheets, etc.
-
-        :return: A string.
-        """
-        return separator.join([s for s in self._all_strings(
-                    strip, types=types)])
-    getText = get_text
-    text = property(get_text)
-
     def decompose(self):
         """Recursively destroys this PageElement and its children.
 
@@ -1416,7 +1496,7 @@
     def __contains__(self, x):
         return x in self.contents
 
-    def __nonzero__(self):
+    def __bool__(self):
         "A tag is non-None even if it has no contents."
         return True
 
@@ -1565,8 +1645,8 @@
             else:
                 if isinstance(val, list) or isinstance(val, tuple):
                     val = ' '.join(val)
-                elif not isinstance(val, basestring):
-                    val = unicode(val)
+                elif not isinstance(val, str):
+                    val = str(val)
                 elif (
                         isinstance(val, AttributeValueWithCharsetSubstitution)
                         and eventual_encoding is not None
@@ -1575,7 +1655,7 @@
 
                 text = formatter.attribute_value(val)
                 decoded = (
-                    unicode(key) + '='
+                    str(key) + '='
                     + formatter.quoted_attribute_value(text))
             attrs.append(decoded)
         close = ''
@@ -1934,7 +2014,7 @@
             else:
                 attrs = kwargs
         normalized_attrs = {}
-        for key, value in attrs.items():
+        for key, value in list(attrs.items()):
             normalized_attrs[key] = self._normalize_search_value(value)
 
         self.attrs = normalized_attrs
@@ -1943,7 +2023,7 @@
     def _normalize_search_value(self, value):
         # Leave it alone if it's a Unicode string, a callable, a
         # regular expression, a boolean, or None.
-        if (isinstance(value, unicode) or isinstance(value, Callable) or hasattr(value, 'match')
+        if (isinstance(value, str) or isinstance(value, Callable) or hasattr(value, 'match')
             or isinstance(value, bool) or value is None):
             return value
 
@@ -1956,7 +2036,7 @@
             new_value = []
             for v in value:
                 if (hasattr(v, '__iter__') and not isinstance(v, bytes)
-                    and not isinstance(v, unicode)):
+                    and not isinstance(v, str)):
                     # This is almost certainly the user's mistake. In the
                     # interests of avoiding infinite loops, we'll let
                     # it through as-is rather than doing a recursive call.
@@ -1968,7 +2048,7 @@
         # Otherwise, convert it into a Unicode string.
         # The unicode(str()) thing is so this will do the same thing on Python 2
         # and Python 3.
-        return unicode(str(value))
+        return str(str(value))
 
     def __str__(self):
         """A human-readable representation of this SoupStrainer."""
@@ -1996,7 +2076,7 @@
             markup = markup_name
             markup_attrs = markup
 
-        if isinstance(self.name, basestring):
+        if isinstance(self.name, str):
             # Optimization for a very common case where the user is
             # searching for a tag with one specific name, and we're
             # looking at a tag with a different name.
@@ -2052,7 +2132,7 @@
         found = None
         # If given a list of items, scan it for a text element that
         # matches.
-        if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, basestring)):
+        if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, str)):
             for element in markup:
                 if isinstance(element, NavigableString) \
                        and self.search(element):
@@ -2065,7 +2145,7 @@
                 found = self.search_tag(markup)
         # If it's text, make sure the text matches.
         elif isinstance(markup, NavigableString) or \
-                 isinstance(markup, basestring):
+                 isinstance(markup, str):
             if not self.name and not self.attrs and self._matches(markup, self.text):
                 found = markup
         else:
@@ -2110,7 +2190,7 @@
             return not match_against
 
         if (hasattr(match_against, '__iter__')
-            and not isinstance(match_against, basestring)):
+            and not isinstance(match_against, str)):
             # We're asked to match against an iterable of items.
             # The markup must be match at least one item in the
             # iterable. We'll try each one in turn.
@@ -2137,7 +2217,7 @@
         # the tag's name and once against its prefixed name.
         match = False
         
-        if not match and isinstance(match_against, unicode):
+        if not match and isinstance(match_against, str):
             # Exact string match
             match = markup == match_against
 
diff -Nru beautifulsoup4-4.9.3/bs4/formatter.py beautifulsoup4-4.10.0/bs4/formatter.py
--- beautifulsoup4-4.9.3/bs4/formatter.py	2019-12-29 15:48:17.000000000 +0000
+++ beautifulsoup4-4.10.0/bs4/formatter.py	2021-09-07 23:36:46.000000000 +0000
@@ -14,7 +14,8 @@
 
     For HTML documents:
      * 'html' - HTML entity substitution for generic HTML documents. (default)
-     * 'html5' - HTML entity substitution for HTML5 documents.
+     * 'html5' - HTML entity substitution for HTML5 documents, as
+                 well as some optimizations in the way tags are rendered.
      * 'minimal' - Only make the substitutions necessary to guarantee
                    valid HTML.
      * None - Do not perform any substitution. This will be faster
@@ -48,6 +49,7 @@
     def __init__(
             self, language=None, entity_substitution=None,
             void_element_close_prefix='/', cdata_containing_tags=None,
+            empty_attributes_are_booleans=False,
     ):
         """Constructor.
 
@@ -64,6 +66,9 @@
            as containing CDATA in this dialect. For example, in HTML,
            <script> and <style> tags are defined as containing CDATA,
            and their contents should not be formatted.
+        :param blank_attributes_are_booleans: Render attributes whose value
+            is the empty string as HTML-style boolean attributes.
+            (Attributes whose value is None are always rendered this way.)
         """
         self.language = language
         self.entity_substitution = entity_substitution
@@ -71,7 +76,8 @@
         self.cdata_containing_tags = self._default(
             language, cdata_containing_tags, 'cdata_containing_tags'
         )
-            
+        self.empty_attributes_are_booleans=empty_attributes_are_booleans
+        
     def substitute(self, ns):
         """Process a string that needs to undergo entity substitution.
         This may be a string encountered in an attribute value or as
@@ -83,7 +89,7 @@
         """
         if not self.entity_substitution:
             return ns
-        from element import NavigableString
+        from .element import NavigableString
         if (isinstance(ns, NavigableString)
             and ns.parent is not None
             and ns.parent.name in self.cdata_containing_tags):
@@ -107,11 +113,17 @@
         By default, attributes are sorted alphabetically. This makes
         behavior consistent between Python 2 and Python 3, and preserves
         backwards compatibility with older versions of Beautiful Soup.
+
+        If `empty_boolean_attributes` is True, then attributes whose
+        values are set to the empty string will be treated as boolean
+        attributes.
         """
         if tag.attrs is None:
             return []
-        return sorted(tag.attrs.items())
-
+        return sorted(
+            (k, (None if self.empty_attributes_are_booleans and v == '' else v))
+            for k, v in list(tag.attrs.items())
+        )
    
 class HTMLFormatter(Formatter):
     """A generic Formatter for HTML."""
@@ -133,7 +145,8 @@
 )
 HTMLFormatter.REGISTRY["html5"] = HTMLFormatter(
     entity_substitution=EntitySubstitution.substitute_html,
-    void_element_close_prefix = None
+    void_element_close_prefix=None,
+    empty_attributes_are_booleans=True,
 )
 HTMLFormatter.REGISTRY["minimal"] = HTMLFormatter(
     entity_substitution=EntitySubstitution.substitute_xml
diff -Nru beautifulsoup4-4.9.3/bs4/__init__.py beautifulsoup4-4.10.0/bs4/__init__.py
--- beautifulsoup4-4.9.3/bs4/__init__.py	2020-10-03 15:30:53.000000000 +0000
+++ beautifulsoup4-4.10.0/bs4/__init__.py	2021-09-08 00:03:32.000000000 +0000
@@ -7,7 +7,7 @@
 provides methods and Pythonic idioms that make it easy to navigate,
 search, and modify the parse tree.
 
-Beautiful Soup works with Python 2.7 and up. It works better if lxml
+Beautiful Soup works with Python 3.5 and up. It works better if lxml
 and/or html5lib is installed.
 
 For more than you ever wanted to know about Beautiful Soup, see the
@@ -15,13 +15,14 @@
 """
 
 __author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.9.3"
-__copyright__ = "Copyright (c) 2004-2020 Leonard Richardson"
+__version__ = "4.10.0"
+__copyright__ = "Copyright (c) 2004-2021 Leonard Richardson"
 # Use of this source code is governed by the MIT license.
 __license__ = "MIT"
 
 __all__ = ['BeautifulSoup']
 
+
 from collections import Counter
 import os
 import re
@@ -29,6 +30,11 @@
 import traceback
 import warnings
 
+# The very first thing we do is give a useful error if someone is
+# running this code under Python 2.
+if sys.version_info.major < 3:
+    raise ImportError('You are trying to use a Python 3-specific version of Beautiful Soup under Python 2. This will not work. The final version of Beautiful Soup to support Python 2 was 4.9.3.')
+
 from .builder import builder_registry, ParserRejectedMarkup
 from .dammit import UnicodeDammit
 from .element import (
@@ -49,10 +55,6 @@
     TemplateString,
     )
 
-# The very first thing we do is give a useful error if someone is
-# running this code under Python 3 without converting it.
-'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'<>'You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
-
 # Define some custom warnings.
 class GuessedAtParserWarning(UserWarning):
     """The warning issued when BeautifulSoup has to guess what parser to
@@ -100,7 +102,7 @@
     # Since BeautifulSoup subclasses Tag, it's possible to treat it as
     # a Tag with a .name. This name makes it clear the BeautifulSoup
     # object isn't a real markup tag.
-    ROOT_TAG_NAME = u'[document]'
+    ROOT_TAG_NAME = '[document]'
 
     # If the end-user gives no indication which tree builder they
     # want, look for one with these features.
@@ -217,7 +219,7 @@
         from_encoding = from_encoding or deprecated_argument(
             "fromEncoding", "from_encoding")
 
-        if from_encoding and isinstance(markup, unicode):
+        if from_encoding and isinstance(markup, str):
             warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
             from_encoding = None
 
@@ -234,7 +236,7 @@
             builder_class = builder
             builder = None
         elif builder is None:
-            if isinstance(features, basestring):
+            if isinstance(features, str):
                 features = [features]
             if features is None or len(features) == 0:
                 features = self.DEFAULT_BUILDER_FEATURES
@@ -309,26 +311,38 @@
             markup = markup.read()
         elif len(markup) <= 256 and (
                 (isinstance(markup, bytes) and not b'<' in markup)
-                or (isinstance(markup, unicode) and not u'<' in markup)
+                or (isinstance(markup, str) and not '<' in markup)
         ):
             # Print out warnings for a couple beginner problems
             # involving passing non-markup to Beautiful Soup.
             # Beautiful Soup will still parse the input as markup,
             # just in case that's what the user really wants.
-            if (isinstance(markup, unicode)
+            if (isinstance(markup, str)
                 and not os.path.supports_unicode_filenames):
                 possible_filename = markup.encode("utf8")
             else:
                 possible_filename = markup
             is_file = False
+            is_directory = False
             try:
                 is_file = os.path.exists(possible_filename)
-            except Exception, e:
+                if is_file:
+                    is_directory = os.path.isdir(possible_filename)
+            except Exception as e:
                 # This is almost certainly a problem involving
                 # characters not valid in filenames on this
                 # system. Just let it go.
                 pass
-            if is_file:
+            if is_directory:
+                warnings.warn(
+                    '"%s" looks like a directory name, not markup. You may'
+                    ' want to open a file found in this directory and pass'
+                    ' the filehandle into Beautiful Soup.' % (
+                        self._decode_markup(markup)
+                    ),
+                    MarkupResemblesLocatorWarning
+                )
+            elif is_file:
                 warnings.warn(
                     '"%s" looks like a filename, not markup. You should'
                     ' probably open this file and pass the filehandle into'
@@ -353,9 +367,9 @@
                 pass
 
         if not success:
-            other_exceptions = [unicode(e) for e in rejections]
+            other_exceptions = [str(e) for e in rejections]
             raise ParserRejectedMarkup(
-                u"The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions)
+                "The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions)
             )
 
         # Clear out the markup and remove the builder's circular
@@ -406,9 +420,9 @@
         if isinstance(markup, bytes):
             space = b' '
             cant_start_with = (b"http:", b"https:")
-        elif isinstance(markup, unicode):
-            space = u' '
-            cant_start_with = (u"http:", u"https:")
+        elif isinstance(markup, str):
+            space = ' '
+            cant_start_with = ("http:", "https:")
         else:
             return
 
@@ -485,7 +499,7 @@
 
         # On top of that, we may be inside a tag that needs a special
         # container class.
-        if self.string_container_stack:
+        if self.string_container_stack and container is NavigableString:
             container = self.builder.string_containers.get(
                 self.string_container_stack[-1].name, container
             )
@@ -541,11 +555,9 @@
     def endData(self, containerClass=None):
         """Method called by the TreeBuilder when the end of a data segment
         occurs.
-        """
-        containerClass = self.string_container(containerClass)
-        
+        """       
         if self.current_data:
-            current_data = u''.join(self.current_data)
+            current_data = ''.join(self.current_data)
             # If whitespace is not preserved, and this string contains
             # nothing but ASCII spaces, replace it with a single space
             # or newline.
@@ -570,6 +582,7 @@
                     not self.parse_only.search(current_data)):
                 return
 
+            containerClass = self.string_container(containerClass)
             o = containerClass(current_data)
             self.object_was_parsed(o)
 
@@ -748,9 +761,9 @@
                 eventual_encoding = None
             if eventual_encoding != None:
                 encoding_part = ' encoding="%s"' % eventual_encoding
-            prefix = u'<?xml version="1.0"%s?>\n' % encoding_part
+            prefix = '<?xml version="1.0"%s?>\n' % encoding_part
         else:
-            prefix = u''
+            prefix = ''
         if not pretty_print:
             indent_level = None
         else:
@@ -788,4 +801,4 @@
 if __name__ == '__main__':
     import sys
     soup = BeautifulSoup(sys.stdin)
-    print(soup.prettify())
+    print((soup.prettify()))
diff -Nru beautifulsoup4-4.9.3/bs4/testing.py beautifulsoup4-4.10.0/bs4/testing.py
--- beautifulsoup4-4.9.3/bs4/testing.py	2020-05-30 18:54:41.000000000 +0000
+++ beautifulsoup4-4.10.0/bs4/testing.py	2021-09-07 23:36:46.000000000 +0000
@@ -8,6 +8,7 @@
 import copy
 import functools
 import unittest
+import warnings
 from unittest import TestCase
 from bs4 import BeautifulSoup
 from bs4.element import (
@@ -25,7 +26,7 @@
 from bs4.builder import HTMLParserTreeBuilder
 default_builder = HTMLParserTreeBuilder
 
-BAD_DOCUMENT = u"""A bare string
+BAD_DOCUMENT = """A bare string
 <!DOCTYPE xsl:stylesheet SYSTEM "htmlent.dtd">
 <!DOCTYPE xsl:stylesheet PUBLIC "htmlent.dtd">
 <div><![CDATA[A CDATA section where it doesn't belong]]></div>
@@ -94,7 +95,7 @@
         # Verify that every tag that was opened was eventually closed.
 
         # There are no tags in the open tag counter.
-        assert all(v==0 for v in obj.open_tag_counter.values())
+        assert all(v==0 for v in list(obj.open_tag_counter.values()))
 
         # The only tag in the tag stack is the one for the root
         # document.
@@ -228,7 +229,41 @@
             return child
 
 
-class HTMLTreeBuilderSmokeTest(object):
+class TreeBuilderSmokeTest(object):
+    # Tests that are common to HTML and XML tree builders.
+
+    def test_fuzzed_input(self):
+        # This test centralizes in one place the various fuzz tests
+        # for Beautiful Soup created by the oss-fuzz project.
+        
+        # These strings superficially resemble markup, but they
+        # generally can't be parsed into anything. The best we can
+        # hope for is that parsing these strings won't crash the
+        # parser.
+        #
+        # n.b. This markup is commented out because these fuzz tests
+        # _do_ crash the parser. However the crashes are due to bugs
+        # in html.parser, not Beautiful Soup -- otherwise I'd fix the
+        # bugs!
+        
+        bad_markup = [
+            # https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=28873
+            # https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/519e5b4269a01185a0d5e76295251921da2f0700
+            # https://bugs.python.org/issue37747
+            #
+            #b'\n<![\xff\xfe\xfe\xcd\x00',
+
+            #https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/de32aa55785be29bbc72a1a8e06b00611fb3d9f8
+            # https://bugs.python.org/issue34480
+            #
+            #b'<![n\x00'
+        ]
+        for markup in bad_markup:
+            with warnings.catch_warnings(record=False):
+                soup = self.soup(markup)
+        
+
+class HTMLTreeBuilderSmokeTest(TreeBuilderSmokeTest):
 
     """A basic test of a treebuilder's competence.
 
@@ -372,7 +407,7 @@
         # process_markup correctly sets processing_instruction_class
         # even when the markup is already Unicode and there is no
         # need to process anything.
-        markup = u"""<?PITarget PIContent?>"""
+        markup = """<?PITarget PIContent?>"""
         soup = self.soup(markup)
         self.assertEqual(markup, soup.decode())
 
@@ -544,14 +579,14 @@
         # "&T" and "&p" look like incomplete character entities, but they are
         # not.
         self.assertSoupEquals(
-            u"<p>&bull; AT&T is in the s&p 500</p>",
-            u"<p>\u2022 AT&amp;T is in the s&amp;p 500</p>"
+            "<p>&bull; AT&T is in the s&p 500</p>",
+            "<p>\u2022 AT&amp;T is in the s&amp;p 500</p>"
         )
 
     def test_apos_entity(self):
         self.assertSoupEquals(
-            u"<p>Bob&apos;s Bar</p>",
-            u"<p>Bob's Bar</p>",
+            "<p>Bob&apos;s Bar</p>",
+            "<p>Bob's Bar</p>",
         )
         
     def test_entities_in_foreign_document_encoding(self):
@@ -564,17 +599,17 @@
         # characters.
         markup = "<p>&#147;Hello&#148; &#45;&#9731;</p>"
         soup = self.soup(markup)
-        self.assertEquals(u"“Hello” -☃", soup.p.string)
+        self.assertEqual("“Hello” -☃", soup.p.string)
         
     def test_entities_in_attributes_converted_to_unicode(self):
-        expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
+        expect = '<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
         self.assertSoupEquals('<p id="pi&#241;ata"></p>', expect)
         self.assertSoupEquals('<p id="pi&#xf1;ata"></p>', expect)
         self.assertSoupEquals('<p id="pi&#Xf1;ata"></p>', expect)
         self.assertSoupEquals('<p id="pi&ntilde;ata"></p>', expect)
 
     def test_entities_in_text_converted_to_unicode(self):
-        expect = u'<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
+        expect = '<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
         self.assertSoupEquals("<p>pi&#241;ata</p>", expect)
         self.assertSoupEquals("<p>pi&#xf1;ata</p>", expect)
         self.assertSoupEquals("<p>pi&#Xf1;ata</p>", expect)
@@ -585,11 +620,11 @@
                               '<p>I said "good day!"</p>')
 
     def test_out_of_range_entity(self):
-        expect = u"\N{REPLACEMENT CHARACTER}"
+        expect = "\N{REPLACEMENT CHARACTER}"
         self.assertSoupEquals("&#10000000000000;", expect)
         self.assertSoupEquals("&#x10000000000000;", expect)
         self.assertSoupEquals("&#1000000000;", expect)
-        
+       
     def test_multipart_strings(self):
         "Mostly to prevent a recurrence of a bug in the html5lib treebuilder."
         soup = self.soup("<html><h2>\nfoo</h2><p></p></html>")
@@ -651,7 +686,7 @@
         markup = b'<a class="foo bar">'
         soup = self.soup(markup)
         self.assertEqual(['foo', 'bar'], soup.a['class'])
-
+        
     #
     # Generally speaking, tests below this point are more tests of
     # Beautiful Soup than tests of the tree builders. But parsers are
@@ -663,9 +698,9 @@
         # A seemingly innocuous document... but it's in Unicode! And
         # it contains characters that can't be represented in the
         # encoding found in the  declaration! The horror!
-        markup = u'<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>'
+        markup = '<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>'
         soup = self.soup(markup)
-        self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string)
+        self.assertEqual('Sacr\xe9 bleu!', soup.body.string)
 
     def test_soupstrainer(self):
         """Parsers should be able to work with SoupStrainers."""
@@ -705,7 +740,7 @@
         # Both XML and HTML entities are converted to Unicode characters
         # during parsing.
         text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
-        expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>"
+        expected = "<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>"
         self.assertSoupEquals(text, expected)
 
     def test_smart_quotes_converted_on_the_way_in(self):
@@ -715,15 +750,15 @@
         soup = self.soup(quote)
         self.assertEqual(
             soup.p.string,
-            u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
+            "\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
 
     def test_non_breaking_spaces_converted_on_the_way_in(self):
         soup = self.soup("<a>&nbsp;&nbsp;</a>")
-        self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2)
+        self.assertEqual(soup.a.string, "\N{NO-BREAK SPACE}" * 2)
 
     def test_entities_converted_on_the_way_out(self):
         text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
-        expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>".encode("utf-8")
+        expected = "<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>".encode("utf-8")
         soup = self.soup(text)
         self.assertEqual(soup.p.encode("utf-8"), expected)
 
@@ -732,7 +767,7 @@
         # easy-to-understand document.
 
         # Here it is in Unicode. Note that it claims to be in ISO-Latin-1.
-        unicode_html = u'<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
+        unicode_html = '<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
 
         # That's because we're going to encode it into ISO-Latin-1, and use
         # that to test.
@@ -848,8 +883,8 @@
             soup = self.soup(markup)
             for encoding in PYTHON_SPECIFIC_ENCODINGS:
                 if encoding in (
-                    u'idna', u'mbcs', u'oem', u'undefined',
-                    u'string_escape', u'string-escape'
+                    'idna', 'mbcs', 'oem', 'undefined',
+                    'string_escape', 'string-escape'
                 ):
                     # For one reason or another, these will raise an
                     # exception if we actually try to use them, so don't
@@ -881,7 +916,7 @@
         self.linkage_validator(soup)
 
 
-class XMLTreeBuilderSmokeTest(object):
+class XMLTreeBuilderSmokeTest(TreeBuilderSmokeTest):
 
     def test_pickle_and_unpickle_identity(self):
         # Pickling a tree, then unpickling it, yields a tree identical
@@ -910,8 +945,8 @@
         soup = self.soup(markup)
         for encoding in PYTHON_SPECIFIC_ENCODINGS:
             if encoding in (
-                u'idna', u'mbcs', u'oem', u'undefined',
-                u'string_escape', u'string-escape'
+                'idna', 'mbcs', 'oem', 'undefined',
+                'string_escape', 'string-escape'
             ):
                 # For one reason or another, these will raise an
                 # exception if we actually try to use them, so don't
@@ -962,15 +997,15 @@
         self.assertTrue(b"&lt; &lt; hey &gt; &gt;" in encoded)
 
     def test_can_parse_unicode_document(self):
-        markup = u'<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>'
+        markup = '<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>'
         soup = self.soup(markup)
-        self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string)
+        self.assertEqual('Sacr\xe9 bleu!', soup.root.string)
 
     def test_popping_namespaced_tag(self):
         markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>'
         soup = self.soup(markup)
         self.assertEqual(
-            unicode(soup.rss), markup)
+            str(soup.rss), markup)
 
     def test_docstring_includes_correct_encoding(self):
         soup = self.soup("<root/>")
@@ -1001,17 +1036,17 @@
     def test_closing_namespaced_tag(self):
         markup = '<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>'
         soup = self.soup(markup)
-        self.assertEqual(unicode(soup.p), markup)
+        self.assertEqual(str(soup.p), markup)
 
     def test_namespaced_attributes(self):
         markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>'
         soup = self.soup(markup)
-        self.assertEqual(unicode(soup.foo), markup)
+        self.assertEqual(str(soup.foo), markup)
 
     def test_namespaced_attributes_xml_namespace(self):
         markup = '<foo xml:lang="fr">bar</foo>'
         soup = self.soup(markup)
-        self.assertEqual(unicode(soup.foo), markup)
+        self.assertEqual(str(soup.foo), markup)
 
     def test_find_by_prefixed_name(self):
         doc = """<?xml version="1.0" encoding="utf-8"?>
diff -Nru beautifulsoup4-4.9.3/bs4/tests/test_html5lib.py beautifulsoup4-4.10.0/bs4/tests/test_html5lib.py
--- beautifulsoup4-4.9.3/bs4/tests/test_html5lib.py	2020-04-05 19:54:12.000000000 +0000
+++ beautifulsoup4-4.10.0/bs4/tests/test_html5lib.py	2021-09-07 23:36:47.000000000 +0000
@@ -5,7 +5,7 @@
 try:
     from bs4.builder import HTML5TreeBuilder
     HTML5LIB_PRESENT = True
-except ImportError, e:
+except ImportError as e:
     HTML5LIB_PRESENT = False
 from bs4.element import SoupStrainer
 from bs4.testing import (
@@ -74,14 +74,14 @@
     def test_reparented_markup(self):
         markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>'
         soup = self.soup(markup)
-        self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode())
+        self.assertEqual("<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode())
         self.assertEqual(2, len(soup.find_all('p')))
 
 
     def test_reparented_markup_ends_with_whitespace(self):
         markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>\n'
         soup = self.soup(markup)
-        self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
+        self.assertEqual("<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
         self.assertEqual(2, len(soup.find_all('p')))
 
     def test_reparented_markup_containing_identical_whitespace_nodes(self):
@@ -127,7 +127,7 @@
     def test_foster_parenting(self):
         markup = b"""<table><td></tbody>A"""
         soup = self.soup(markup)
-        self.assertEqual(u"<body>A<table><tbody><tr><td></td></tr></tbody></table></body>", soup.body.decode())
+        self.assertEqual("<body>A<table><tbody><tr><td></td></tr></tbody></table></body>", soup.body.decode())
 
     def test_extraction(self):
         """
@@ -188,3 +188,39 @@
         # because there's no way of knowing, when a string is created,
         # where in the tree it will eventually end up.
         pass
+
+    def test_html5_attributes(self):
+        # The html5lib TreeBuilder can convert any entity named in
+        # the HTML5 spec to a sequence of Unicode characters, and
+        # convert those Unicode characters to a (potentially
+        # different) named entity on the way out.
+        #
+        # This is a copy of the same test from
+        # HTMLParserTreeBuilderSmokeTest.  It's not in the superclass
+        # because the lxml HTML TreeBuilder _doesn't_ work this way.
+        for input_element, output_unicode, output_element in (
+                ("&RightArrowLeftArrow;", '\u21c4', b'&rlarr;'),
+                ('&models;', '\u22a7', b'&models;'),
+                ('&Nfr;', '\U0001d511', b'&Nfr;'),
+                ('&ngeqq;', '\u2267\u0338', b'&ngeqq;'),
+                ('&not;', '\xac', b'&not;'),
+                ('&Not;', '\u2aec', b'&Not;'),
+                ('&quot;', '"', b'"'),
+                ('&there4;', '\u2234', b'&there4;'),
+                ('&Therefore;', '\u2234', b'&there4;'),
+                ('&therefore;', '\u2234', b'&there4;'),
+                ("&fjlig;", 'fj', b'fj'),                
+                ("&sqcup;", '\u2294', b'&sqcup;'),
+                ("&sqcups;", '\u2294\ufe00', b'&sqcups;'),
+                ("&apos;", "'", b"'"),
+                ("&verbar;", "|", b"|"),
+        ):
+            markup = '<div>%s</div>' % input_element
+            div = self.soup(markup).div
+            without_element = div.encode()
+            expect = b"<div>%s</div>" % output_unicode.encode("utf8")
+            self.assertEqual(without_element, expect)
+
+            with_element = div.encode(formatter="html")
+            expect = b"<div>%s</div>" % output_element
+            self.assertEqual(with_element, expect)
diff -Nru beautifulsoup4-4.9.3/bs4/tests/test_htmlparser.py beautifulsoup4-4.10.0/bs4/tests/test_htmlparser.py
--- beautifulsoup4-4.9.3/bs4/tests/test_htmlparser.py	2020-05-17 16:18:11.000000000 +0000
+++ beautifulsoup4-4.10.0/bs4/tests/test_htmlparser.py	2021-09-07 23:36:47.000000000 +0000
@@ -3,6 +3,7 @@
 
 from pdb import set_trace
 import pickle
+import warnings
 from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
 from bs4.builder import HTMLParserTreeBuilder
 from bs4.builder._htmlparser import BeautifulSoupHTMLParser
@@ -60,20 +61,20 @@
         # If you don't provide any particular value for
         # on_duplicate_attribute, later values replace earlier values.
         soup = self.soup(markup)
-        self.assertEquals("url3", soup.a['href'])
-        self.assertEquals(["cls"], soup.a['class'])
-        self.assertEquals("id", soup.a['id'])
+        self.assertEqual("url3", soup.a['href'])
+        self.assertEqual(["cls"], soup.a['class'])
+        self.assertEqual("id", soup.a['id'])
         
         # You can also get this behavior explicitly.
         def assert_attribute(on_duplicate_attribute, expected):
             soup = self.soup(
                 markup, on_duplicate_attribute=on_duplicate_attribute
             )
-            self.assertEquals(expected, soup.a['href'])
+            self.assertEqual(expected, soup.a['href'])
 
             # Verify that non-duplicate attributes are treated normally.
-            self.assertEquals(["cls"], soup.a['class'])
-            self.assertEquals("id", soup.a['id'])
+            self.assertEqual(["cls"], soup.a['class'])
+            self.assertEqual("id", soup.a['id'])
         assert_attribute(None, "url3")
         assert_attribute(BeautifulSoupHTMLParser.REPLACE, "url3")
 
@@ -87,6 +88,38 @@
             attrs[key].append(value)
         assert_attribute(accumulate, ["url1", "url2", "url3"])            
 
+    def test_html5_attributes(self):
+        # The html.parser TreeBuilder can convert any entity named in
+        # the HTML5 spec to a sequence of Unicode characters, and
+        # convert those Unicode characters to a (potentially
+        # different) named entity on the way out.
+        for input_element, output_unicode, output_element in (
+                ("&RightArrowLeftArrow;", '\u21c4', b'&rlarr;'),
+                ('&models;', '\u22a7', b'&models;'),
+                ('&Nfr;', '\U0001d511', b'&Nfr;'),
+                ('&ngeqq;', '\u2267\u0338', b'&ngeqq;'),
+                ('&not;', '\xac', b'&not;'),
+                ('&Not;', '\u2aec', b'&Not;'),
+                ('&quot;', '"', b'"'),
+                ('&there4;', '\u2234', b'&there4;'),
+                ('&Therefore;', '\u2234', b'&there4;'),
+                ('&therefore;', '\u2234', b'&there4;'),
+                ("&fjlig;", 'fj', b'fj'),                
+                ("&sqcup;", '\u2294', b'&sqcup;'),
+                ("&sqcups;", '\u2294\ufe00', b'&sqcups;'),
+                ("&apos;", "'", b"'"),
+                ("&verbar;", "|", b"|"),
+        ):
+            markup = '<div>%s</div>' % input_element
+            div = self.soup(markup).div
+            without_element = div.encode()
+            expect = b"<div>%s</div>" % output_unicode.encode("utf8")
+            self.assertEqual(without_element, expect)
+
+            with_element = div.encode(formatter="html")
+            expect = b"<div>%s</div>" % output_element
+            self.assertEqual(with_element, expect)
+
 
 class TestHTMLParserSubclass(SoupTest):
     def test_error(self):
@@ -94,4 +127,8 @@
         that doesn't cause a crash.
         """
         parser = BeautifulSoupHTMLParser()
-        parser.error("don't crash")
+        with warnings.catch_warnings(record=True) as warns:
+            parser.error("don't crash")
+        [warning] = warns
+        assert "don't crash" == str(warning.message)
+
diff -Nru beautifulsoup4-4.9.3/bs4/tests/test_lxml.py beautifulsoup4-4.10.0/bs4/tests/test_lxml.py
--- beautifulsoup4-4.9.3/bs4/tests/test_lxml.py	2020-04-05 19:54:12.000000000 +0000
+++ beautifulsoup4-4.10.0/bs4/tests/test_lxml.py	2021-09-07 23:36:47.000000000 +0000
@@ -7,7 +7,7 @@
     import lxml.etree
     LXML_PRESENT = True
     LXML_VERSION = lxml.etree.LXML_VERSION
-except ImportError, e:
+except ImportError as e:
     LXML_PRESENT = False
     LXML_VERSION = (0,)
 
@@ -45,7 +45,7 @@
             "<p>foo&#x10000000000000;bar</p>", "<p>foobar</p>")
         self.assertSoupEquals(
             "<p>foo&#1000000000;bar</p>", "<p>foobar</p>")
-
+        
     def test_entities_in_foreign_document_encoding(self):
         # We can't implement this case correctly because by the time we
         # hear about markup like "&#147;", it's been (incorrectly) converted into
@@ -68,7 +68,7 @@
         # if one is installed.
         with warnings.catch_warnings(record=True) as w:
             soup = BeautifulStoneSoup("<b />")
-        self.assertEqual(u"<b/>", unicode(soup.b))
+        self.assertEqual("<b/>", str(soup.b))
         self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))
 
     def test_tracking_line_numbers(self):
diff -Nru beautifulsoup4-4.9.3/bs4/tests/test_soup.py beautifulsoup4-4.10.0/bs4/tests/test_soup.py
--- beautifulsoup4-4.9.3/bs4/tests/test_soup.py	2020-04-21 12:12:33.000000000 +0000
+++ beautifulsoup4-4.10.0/bs4/tests/test_soup.py	2021-09-07 23:36:47.000000000 +0000
@@ -3,6 +3,7 @@
 
 from pdb import set_trace
 import logging
+import os
 import unittest
 import sys
 import tempfile
@@ -31,7 +32,6 @@
 from bs4.dammit import (
     EntitySubstitution,
     UnicodeDammit,
-    EncodingDetector,
 )
 from bs4.testing import (
     default_builder,
@@ -51,17 +51,17 @@
 class TestConstructor(SoupTest):
 
     def test_short_unicode_input(self):
-        data = u"<h1>éé</h1>"
+        data = "<h1>éé</h1>"
         soup = self.soup(data)
-        self.assertEqual(u"éé", soup.h1.string)
+        self.assertEqual("éé", soup.h1.string)
 
     def test_embedded_null(self):
-        data = u"<h1>foo\0bar</h1>"
+        data = "<h1>foo\0bar</h1>"
         soup = self.soup(data)
-        self.assertEqual(u"foo\0bar", soup.h1.string)
+        self.assertEqual("foo\0bar", soup.h1.string)
 
     def test_exclude_encodings(self):
-        utf8_data = u"Räksmörgås".encode("utf-8")
+        utf8_data = "Räksmörgås".encode("utf-8")
         soup = self.soup(utf8_data, exclude_encodings=["utf-8"])
         self.assertEqual("windows-1252", soup.original_encoding)
 
@@ -127,7 +127,7 @@
             yield markup, None, None, False
             
         import re
-        self.assertRaisesRegexp(
+        self.assertRaisesRegex(
             ParserRejectedMarkup,
             "The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.",
             BeautifulSoup, '', builder=Mock,
@@ -291,6 +291,21 @@
             soup = self.soup(filename)
         self.assertEqual([], w)
 
+    def test_directory_warning(self):
+        try:
+            filename = tempfile.mkdtemp()
+            with warnings.catch_warnings(record=True) as w:
+                soup = self.soup(filename)
+            warning = self._assert_warning(w, MarkupResemblesLocatorWarning)
+            self.assertTrue("looks like a directory" in str(warning.message))
+        finally:
+            os.rmdir(filename)
+
+        # The directory no longer exists, so Beautiful Soup will no longer issue the warning.
+        with warnings.catch_warnings(record=True) as w:
+            soup = self.soup(filename)
+        self.assertEqual([], w)
+        
     def test_url_warning_with_bytes_url(self):
         with warnings.catch_warnings(record=True) as warning_list:
             soup = self.soup(b"http://www.crummybytes.com/")
@@ -303,7 +318,7 @@
         with warnings.catch_warnings(record=True) as warning_list:
             # note - this url must differ from the bytes one otherwise
             # python's warnings system swallows the second warning
-            soup = self.soup(u"http://www.crummyunicode.com/")
+            soup = self.soup("http://www.crummyunicode.com/")
         warning = self._assert_warning(
             warning_list, MarkupResemblesLocatorWarning
         )
@@ -319,7 +334,7 @@
 
     def test_url_warning_with_unicode_and_space(self):
         with warnings.catch_warnings(record=True) as warning_list:
-            soup = self.soup(u"http://www.crummyuncode.com/ is great")
+            soup = self.soup("http://www.crummyuncode.com/ is great")
         self.assertFalse(any("looks like a URL" in str(w.message) 
             for w in warning_list))
 
@@ -341,9 +356,9 @@
     def test_simple_html_substitution(self):
         # Unicode characters corresponding to named HTML entites
         # are substituted, and no others.
-        s = u"foo\u2200\N{SNOWMAN}\u00f5bar"
+        s = "foo\u2200\N{SNOWMAN}\u00f5bar"
         self.assertEqual(self.sub.substitute_html(s),
-                          u"foo&forall;\N{SNOWMAN}&otilde;bar")
+                          "foo&forall;\N{SNOWMAN}&otilde;bar")
 
     def test_smart_quote_substitution(self):
         # MS smart quotes are a common source of frustration, so we
@@ -353,6 +368,51 @@
         self.assertEqual(self.sub.substitute_html(dammit.markup),
                           "&lsquo;&rsquo;foo&ldquo;&rdquo;")
 
+    def test_html5_entity(self):
+        # Some HTML5 entities correspond to single- or multi-character
+        # Unicode sequences.
+
+        for entity, u in (
+            # A few spot checks of our ability to recognize
+            # special character sequences and convert them
+            # to named entities.
+            ('&models;', '\u22a7'),
+            ('&Nfr;', '\U0001d511'),
+            ('&ngeqq;', '\u2267\u0338'),
+            ('&not;', '\xac'),
+            ('&Not;', '\u2aec'),
+                
+            # We _could_ convert | to &verbarr;, but we don't, because
+            # | is an ASCII character.
+            ('|' '|'),
+
+            # Similarly for the fj ligature, which we could convert to
+            # &fjlig;, but we don't.
+            ("fj", "fj"),
+
+            # We do convert _these_ ASCII characters to HTML entities,
+            # because that's required to generate valid HTML.
+            ('&gt;', '>'),
+            ('&lt;', '<'),
+            ('&amp;', '&'),
+        ):
+            template = '3 %s 4'
+            raw = template % u
+            with_entities = template % entity
+            self.assertEqual(self.sub.substitute_html(raw), with_entities)
+            
+    def test_html5_entity_with_variation_selector(self):
+        # Some HTML5 entities correspond either to a single-character
+        # Unicode sequence _or_ to the same character plus U+FE00,
+        # VARIATION SELECTOR 1. We can handle this.
+        data = "fjords \u2294 penguins"
+        markup = "fjords &sqcup; penguins"
+        self.assertEqual(self.sub.substitute_html(data), markup)
+
+        data = "fjords \u2294\ufe00 penguins"
+        markup = "fjords &sqcups; penguins"
+        self.assertEqual(self.sub.substitute_html(data), markup)
+        
     def test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_false(self):
         s = 'Welcome to "my bar"'
         self.assertEqual(self.sub.substitute_xml(s, False), s)
@@ -408,7 +468,7 @@
 
     def setUp(self):
         super(TestEncodingConversion, self).setUp()
-        self.unicode_data = u'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>'
+        self.unicode_data = '<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>'
         self.utf8_data = self.unicode_data.encode("utf-8")
         # Just so you know what it looks like.
         self.assertEqual(
@@ -428,7 +488,7 @@
             ascii = b"<foo>a</foo>"
             soup_from_ascii = self.soup(ascii)
             unicode_output = soup_from_ascii.decode()
-            self.assertTrue(isinstance(unicode_output, unicode))
+            self.assertTrue(isinstance(unicode_output, str))
             self.assertEqual(unicode_output, self.document_for(ascii.decode()))
             self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8")
         finally:
@@ -440,7 +500,7 @@
         # is not set.
         soup_from_unicode = self.soup(self.unicode_data)
         self.assertEqual(soup_from_unicode.decode(), self.unicode_data)
-        self.assertEqual(soup_from_unicode.foo.string, u'Sacr\xe9 bleu!')
+        self.assertEqual(soup_from_unicode.foo.string, 'Sacr\xe9 bleu!')
         self.assertEqual(soup_from_unicode.original_encoding, None)
 
     def test_utf8_in_unicode_out(self):
@@ -448,7 +508,7 @@
         # attribute is set.
         soup_from_utf8 = self.soup(self.utf8_data)
         self.assertEqual(soup_from_utf8.decode(), self.unicode_data)
-        self.assertEqual(soup_from_utf8.foo.string, u'Sacr\xe9 bleu!')
+        self.assertEqual(soup_from_utf8.foo.string, 'Sacr\xe9 bleu!')
 
     def test_utf8_out(self):
         # The internal data structures can be encoded as UTF-8.
@@ -459,238 +519,29 @@
         PYTHON_3_PRE_3_2,
         "Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")
     def test_attribute_name_containing_unicode_characters(self):
-        markup = u'<div><a \N{SNOWMAN}="snowman"></a></div>'
+        markup = '<div><a \N{SNOWMAN}="snowman"></a></div>'
         self.assertEqual(self.soup(markup).div.encode("utf8"), markup.encode("utf8"))
 
-class TestUnicodeDammit(unittest.TestCase):
-    """Standalone tests of UnicodeDammit."""
 
-    def test_unicode_input(self):
-        markup = u"I'm already Unicode! \N{SNOWMAN}"
-        dammit = UnicodeDammit(markup)
-        self.assertEqual(dammit.unicode_markup, markup)
-
-    def test_smart_quotes_to_unicode(self):
-        markup = b"<foo>\x91\x92\x93\x94</foo>"
-        dammit = UnicodeDammit(markup)
-        self.assertEqual(
-            dammit.unicode_markup, u"<foo>\u2018\u2019\u201c\u201d</foo>")
-
-    def test_smart_quotes_to_xml_entities(self):
-        markup = b"<foo>\x91\x92\x93\x94</foo>"
-        dammit = UnicodeDammit(markup, smart_quotes_to="xml")
-        self.assertEqual(
-            dammit.unicode_markup, "<foo>&#x2018;&#x2019;&#x201C;&#x201D;</foo>")
-
-    def test_smart_quotes_to_html_entities(self):
-        markup = b"<foo>\x91\x92\x93\x94</foo>"
-        dammit = UnicodeDammit(markup, smart_quotes_to="html")
-        self.assertEqual(
-            dammit.unicode_markup, "<foo>&lsquo;&rsquo;&ldquo;&rdquo;</foo>")
-
-    def test_smart_quotes_to_ascii(self):
-        markup = b"<foo>\x91\x92\x93\x94</foo>"
-        dammit = UnicodeDammit(markup, smart_quotes_to="ascii")
-        self.assertEqual(
-            dammit.unicode_markup, """<foo>''""</foo>""")
-
-    def test_detect_utf8(self):
-        utf8 = b"Sacr\xc3\xa9 bleu! \xe2\x98\x83"
-        dammit = UnicodeDammit(utf8)
-        self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
-        self.assertEqual(dammit.unicode_markup, u'Sacr\xe9 bleu! \N{SNOWMAN}')
-
-
-    def test_convert_hebrew(self):
-        hebrew = b"\xed\xe5\xec\xf9"
-        dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
-        self.assertEqual(dammit.original_encoding.lower(), 'iso-8859-8')
-        self.assertEqual(dammit.unicode_markup, u'\u05dd\u05d5\u05dc\u05e9')
-
-    def test_dont_see_smart_quotes_where_there_are_none(self):
-        utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
-        dammit = UnicodeDammit(utf_8)
-        self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
-        self.assertEqual(dammit.unicode_markup.encode("utf-8"), utf_8)
-
-    def test_ignore_inappropriate_codecs(self):
-        utf8_data = u"Räksmörgås".encode("utf-8")
-        dammit = UnicodeDammit(utf8_data, ["iso-8859-8"])
-        self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
-
-    def test_ignore_invalid_codecs(self):
-        utf8_data = u"Räksmörgås".encode("utf-8")
-        for bad_encoding in ['.utf8', '...', 'utF---16.!']:
-            dammit = UnicodeDammit(utf8_data, [bad_encoding])
-            self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
-
-    def test_exclude_encodings(self):
-        # This is UTF-8.
-        utf8_data = u"Räksmörgås".encode("utf-8")
-
-        # But if we exclude UTF-8 from consideration, the guess is
-        # Windows-1252.
-        dammit = UnicodeDammit(utf8_data, exclude_encodings=["utf-8"])
-        self.assertEqual(dammit.original_encoding.lower(), 'windows-1252')
-
-        # And if we exclude that, there is no valid guess at all.
-        dammit = UnicodeDammit(
-            utf8_data, exclude_encodings=["utf-8", "windows-1252"])
-        self.assertEqual(dammit.original_encoding, None)
-
-    def test_encoding_detector_replaces_junk_in_encoding_name_with_replacement_character(self):
-        detected = EncodingDetector(
-            b'<?xml version="1.0" encoding="UTF-\xdb" ?>')
-        encodings = list(detected.encodings)
-        assert u'utf-\N{REPLACEMENT CHARACTER}' in encodings
-
-    def test_detect_html5_style_meta_tag(self):
-
-        for data in (
-            b'<html><meta charset="euc-jp" /></html>',
-            b"<html><meta charset='euc-jp' /></html>",
-            b"<html><meta charset=euc-jp /></html>",
-            b"<html><meta charset=euc-jp/></html>"):
-            dammit = UnicodeDammit(data, is_html=True)
-            self.assertEqual(
-                "euc-jp", dammit.original_encoding)
-
-    def test_last_ditch_entity_replacement(self):
-        # This is a UTF-8 document that contains bytestrings
-        # completely incompatible with UTF-8 (ie. encoded with some other
-        # encoding).
-        #
-        # Since there is no consistent encoding for the document,
-        # Unicode, Dammit will eventually encode the document as UTF-8
-        # and encode the incompatible characters as REPLACEMENT
-        # CHARACTER.
-        #
-        # If chardet is installed, it will detect that the document
-        # can be converted into ISO-8859-1 without errors. This happens
-        # to be the wrong encoding, but it is a consistent encoding, so the
-        # code we're testing here won't run.
-        #
-        # So we temporarily disable chardet if it's present.
-        doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?>
-<html><b>\330\250\330\252\330\261</b>
-<i>\310\322\321\220\312\321\355\344</i></html>"""
-        chardet = bs4.dammit.chardet_dammit
-        logging.disable(logging.WARNING)
-        try:
-            def noop(str):
-                return None
-            bs4.dammit.chardet_dammit = noop
-            dammit = UnicodeDammit(doc)
-            self.assertEqual(True, dammit.contains_replacement_characters)
-            self.assertTrue(u"\ufffd" in dammit.unicode_markup)
-
-            soup = BeautifulSoup(doc, "html.parser")
-            self.assertTrue(soup.contains_replacement_characters)
-        finally:
-            logging.disable(logging.NOTSET)
-            bs4.dammit.chardet_dammit = chardet
-
-    def test_byte_order_mark_removed(self):
-        # A document written in UTF-16LE will have its byte order marker stripped.
-        data = b'\xff\xfe<\x00a\x00>\x00\xe1\x00\xe9\x00<\x00/\x00a\x00>\x00'
-        dammit = UnicodeDammit(data)
-        self.assertEqual(u"<a>áé</a>", dammit.unicode_markup)
-        self.assertEqual("utf-16le", dammit.original_encoding)
-
-    def test_detwingle(self):
-        # Here's a UTF8 document.
-        utf8 = (u"\N{SNOWMAN}" * 3).encode("utf8")
-
-        # Here's a Windows-1252 document.
-        windows_1252 = (
-            u"\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!"
-            u"\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252")
-
-        # Through some unholy alchemy, they've been stuck together.
-        doc = utf8 + windows_1252 + utf8
-
-        # The document can't be turned into UTF-8:
-        self.assertRaises(UnicodeDecodeError, doc.decode, "utf8")
-
-        # Unicode, Dammit thinks the whole document is Windows-1252,
-        # and decodes it into "â˜ƒâ˜ƒâ˜ƒ“Hi, I like Windows!”â˜ƒâ˜ƒâ˜ƒ"
-
-        # But if we run it through fix_embedded_windows_1252, it's fixed:
-
-        fixed = UnicodeDammit.detwingle(doc)
-        self.assertEqual(
-            u"☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8"))
-
-    def test_detwingle_ignores_multibyte_characters(self):
-        # Each of these characters has a UTF-8 representation ending
-        # in \x93. \x93 is a smart quote if interpreted as
-        # Windows-1252. But our code knows to skip over multibyte
-        # UTF-8 characters, so they'll survive the process unscathed.
-        for tricky_unicode_char in (
-            u"\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93'
-            u"\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93'
-            u"\xf0\x90\x90\x93", # This is a CJK character, not sure which one.
-            ):
-            input = tricky_unicode_char.encode("utf8")
-            self.assertTrue(input.endswith(b'\x93'))
-            output = UnicodeDammit.detwingle(input)
-            self.assertEqual(output, input)
-
-    def test_find_declared_encoding(self):
-        # Test our ability to find a declared encoding inside an
-        # XML or HTML document.
-        #
-        # Even if the document comes in as Unicode, it may be
-        # interesting to know what encoding was claimed
-        # originally.
-
-        html_unicode = u'<html><head><meta charset="utf-8"></head></html>'
-        html_bytes = html_unicode.encode("ascii")
-
-        xml_unicode= u'<?xml version="1.0" encoding="ISO-8859-1" ?>'
-        xml_bytes = xml_unicode.encode("ascii")
-
-        m = EncodingDetector.find_declared_encoding
-        self.assertEquals(None, m(html_unicode, is_html=False))
-        self.assertEquals("utf-8", m(html_unicode, is_html=True))
-        self.assertEquals("utf-8", m(html_bytes, is_html=True))
-
-        self.assertEquals("iso-8859-1", m(xml_unicode))
-        self.assertEquals("iso-8859-1", m(xml_bytes))
-
-        # Normally, only the first few kilobytes of a document are checked for
-        # an encoding.
-        spacer = b' ' * 5000
-        self.assertEquals(None, m(spacer + html_bytes))
-        self.assertEquals(None, m(spacer + xml_bytes))
-
-        # But you can tell find_declared_encoding to search an entire
-        # HTML document.
-        self.assertEquals(
-            "utf-8",
-            m(spacer + html_bytes, is_html=True, search_entire_document=True)
-        )
-
-        # The XML encoding declaration has to be the very first thing
-        # in the document. We'll allow whitespace before the document
-        # starts, but nothing else.
-        self.assertEquals(
-            "iso-8859-1",
-            m(xml_bytes, search_entire_document=True)
-        )
-        self.assertEquals(
-            None, m(b'a' + xml_bytes, search_entire_document=True)
-        )
-            
 class TestNamedspacedAttribute(SoupTest):
 
     def test_name_may_be_none_or_missing(self):
         a = NamespacedAttribute("xmlns", None)
         self.assertEqual(a, "xmlns")
 
+        a = NamespacedAttribute("xmlns", "")
+        self.assertEqual(a, "xmlns")
+
         a = NamespacedAttribute("xmlns")
         self.assertEqual(a, "xmlns")
         
+    def test_namespace_may_be_none_or_missing(self):
+        a = NamespacedAttribute(None, "tag")
+        self.assertEqual(a, "tag")
+        
+        a = NamespacedAttribute("", "tag")
+        self.assertEqual(a, "tag")
+        
     def test_attribute_is_equivalent_to_colon_separated_string(self):
         a = NamespacedAttribute("a", "b")
         self.assertEqual("a:b", a)
diff -Nru beautifulsoup4-4.9.3/bs4/tests/test_tree.py beautifulsoup4-4.10.0/bs4/tests/test_tree.py
--- beautifulsoup4-4.9.3/bs4/tests/test_tree.py	2020-09-26 15:15:48.000000000 +0000
+++ beautifulsoup4-4.10.0/bs4/tests/test_tree.py	2021-09-07 23:36:47.000000000 +0000
@@ -75,13 +75,13 @@
         self.assertEqual(soup.find("b").string, "2")
 
     def test_unicode_text_find(self):
-        soup = self.soup(u'<h1>Räksmörgås</h1>')
-        self.assertEqual(soup.find(string=u'Räksmörgås'), u'Räksmörgås')
+        soup = self.soup('<h1>Räksmörgås</h1>')
+        self.assertEqual(soup.find(string='Räksmörgås'), 'Räksmörgås')
 
     def test_unicode_attribute_find(self):
-        soup = self.soup(u'<h1 id="Räksmörgås">here it is</h1>')
+        soup = self.soup('<h1 id="Räksmörgås">here it is</h1>')
         str(soup)
-        self.assertEqual("here it is", soup.find(id=u'Räksmörgås').text)
+        self.assertEqual("here it is", soup.find(id='Räksmörgås').text)
 
 
     def test_find_everything(self):
@@ -101,17 +101,17 @@
         """You can search the tree for text nodes."""
         soup = self.soup("<html>Foo<b>bar</b>\xbb</html>")
         # Exact match.
-        self.assertEqual(soup.find_all(string="bar"), [u"bar"])
-        self.assertEqual(soup.find_all(text="bar"), [u"bar"])
+        self.assertEqual(soup.find_all(string="bar"), ["bar"])
+        self.assertEqual(soup.find_all(text="bar"), ["bar"])
         # Match any of a number of strings.
         self.assertEqual(
-            soup.find_all(text=["Foo", "bar"]), [u"Foo", u"bar"])
+            soup.find_all(text=["Foo", "bar"]), ["Foo", "bar"])
         # Match a regular expression.
         self.assertEqual(soup.find_all(text=re.compile('.*')),
-                         [u"Foo", u"bar", u'\xbb'])
+                         ["Foo", "bar", '\xbb'])
         # Match anything.
         self.assertEqual(soup.find_all(text=True),
-                         [u"Foo", u"bar", u'\xbb'])
+                         ["Foo", "bar", '\xbb'])
 
     def test_find_all_limit(self):
         """You can limit the number of items returned by find_all."""
@@ -254,8 +254,8 @@
                            ["Matching a.", "Matching b."])
 
     def test_find_all_by_utf8_attribute_value(self):
-        peace = u"םולש".encode("utf8")
-        data = u'<a title="םולש"></a>'.encode("utf8")
+        peace = "םולש".encode("utf8")
+        data = '<a title="םולש"></a>'.encode("utf8")
         soup = self.soup(data)
         self.assertEqual([soup.a], soup.find_all(title=peace))
         self.assertEqual([soup.a], soup.find_all(title=peace.decode("utf8")))
@@ -444,7 +444,7 @@
         # output.
 
         # Since the <span> tag has two children, its .string is None.
-        self.assertEquals(None, div.span.string)
+        self.assertEqual(None, div.span.string)
 
         self.assertEqual(7, len(div.contents))
         div.smooth()
@@ -755,18 +755,18 @@
 
         # No list of whitespace-preserving tags -> pretty-print
         tag._preserve_whitespace_tags = None
-        self.assertEquals(True, tag._should_pretty_print(0))
+        self.assertEqual(True, tag._should_pretty_print(0))
 
         # List exists but tag is not on the list -> pretty-print
         tag.preserve_whitespace_tags = ["some_other_tag"]
-        self.assertEquals(True, tag._should_pretty_print(1))
+        self.assertEqual(True, tag._should_pretty_print(1))
 
         # Indent level is None -> don't pretty-print
-        self.assertEquals(False, tag._should_pretty_print(None))
+        self.assertEqual(False, tag._should_pretty_print(None))
         
         # Tag is on the whitespace-preserving list -> don't pretty-print
         tag.preserve_whitespace_tags = ["some_other_tag", "a_tag"]
-        self.assertEquals(False, tag._should_pretty_print(1))
+        self.assertEqual(False, tag._should_pretty_print(1))
 
         
 class TestTagCreation(SoupTest):
@@ -905,10 +905,10 @@
             assert not isinstance(i, BeautifulSoup)
         
         p1, p2, p3, p4 = list(soup.children)
-        self.assertEquals("And now, a word:", p1.string)
-        self.assertEquals("p2", p2.string)
-        self.assertEquals("p3", p3.string)
-        self.assertEquals("And we're back.", p4.string)
+        self.assertEqual("And now, a word:", p1.string)
+        self.assertEqual("p2", p2.string)
+        self.assertEqual("p3", p3.string)
+        self.assertEqual("And we're back.", p4.string)
         
         
     def test_replace_with_maintains_next_element_throughout(self):
@@ -1015,8 +1015,8 @@
         d1 = soup.find('div', id='d1')
         d2 = soup.find('div', id='d2')
         d2.extend(d1)
-        self.assertEqual(u'<div id="d1"></div>', d1.decode())
-        self.assertEqual(u'<div id="d2"><a>1</a><a>2</a><a>3</a><a>4</a></div>', d2.decode())
+        self.assertEqual('<div id="d1"></div>', d1.decode())
+        self.assertEqual('<div id="d2"><a>1</a><a>2</a><a>3</a><a>4</a></div>', d2.decode())
         
     def test_move_tag_to_beginning_of_parent(self):
         data = "<a><b></b><c></c><d></d></a>"
@@ -1130,6 +1130,37 @@
         self.assertEqual(no.next_element, "no")
         self.assertEqual(no.next_sibling, " business")
 
+    def test_replace_with_errors(self):
+        # Can't replace a tag that's not part of a tree.
+        a_tag = Tag(name="a")
+        self.assertRaises(ValueError, a_tag.replace_with, "won't work")
+
+        # Can't replace a tag with its parent.
+        a_tag = self.soup("<a><b></b></a>").a
+        self.assertRaises(ValueError, a_tag.b.replace_with, a_tag)
+
+        # Or with a list that includes its parent.
+        self.assertRaises(ValueError, a_tag.b.replace_with,
+                          "string1", a_tag, "string2")
+        
+    def test_replace_with_multiple(self):
+        data = "<a><b></b><c></c></a>"
+        soup = self.soup(data)
+        d_tag = soup.new_tag("d")
+        d_tag.string = "Text In D Tag"
+        e_tag = soup.new_tag("e")
+        f_tag = soup.new_tag("f")
+        a_string = "Random Text"
+        soup.c.replace_with(d_tag, e_tag, a_string, f_tag)
+        self.assertEqual(
+            "<a><b></b><d>Text In D Tag</d><e></e>Random Text<f></f></a>",
+            soup.decode()
+        )
+        assert soup.b.next_element == d_tag
+        assert d_tag.string.next_element==e_tag
+        assert e_tag.next_element.string == a_string
+        assert e_tag.next_element.next_element == f_tag
+        
     def test_replace_first_child(self):
         data = "<a><b></b><c></c></a>"
         soup = self.soup(data)
@@ -1262,7 +1293,7 @@
 <script>baz</script>
 </html>""")
         [soup.script.extract() for i in soup.find_all("script")]
-        self.assertEqual("<body>\n\n<a></a>\n</body>", unicode(soup.body))
+        self.assertEqual("<body>\n\n<a></a>\n</body>", str(soup.body))
 
 
     def test_extract_works_when_element_is_surrounded_by_identical_strings(self):
@@ -1440,6 +1471,40 @@
         soup = self.soup("foo<style>CSS</style><script>Javascript</script>bar")
         self.assertEqual(['foo', 'bar'], list(soup.strings))
 
+    def test_string_methods_inside_special_string_container_tags(self):
+        # Strings inside tags like <script> are generally ignored by
+        # methods like get_text, because they're not what humans
+        # consider 'text'. But if you call get_text on the <script>
+        # tag itself, those strings _are_ considered to be 'text',
+        # because there's nothing else you might be looking for.
+        
+        style = self.soup("<div>a<style>Some CSS</style></div>")
+        template = self.soup("<div>a<template><p>Templated <b>text</b>.</p><!--With a comment.--></template></div>")
+        script = self.soup("<div>a<script><!--a comment-->Some text</script></div>")
+        
+        self.assertEqual(style.div.get_text(), "a")
+        self.assertEqual(list(style.div.strings), ["a"])
+        self.assertEqual(style.div.style.get_text(), "Some CSS")
+        self.assertEqual(list(style.div.style.strings),
+                         ['Some CSS'])
+        
+        # The comment is not picked up here. That's because it was
+        # parsed into a Comment object, which is not considered
+        # interesting by template.strings.
+        self.assertEqual(template.div.get_text(), "a")
+        self.assertEqual(list(template.div.strings), ["a"])
+        self.assertEqual(template.div.template.get_text(), "Templated text.")
+        self.assertEqual(list(template.div.template.strings),
+                         ["Templated ", "text", "."])
+
+        # The comment is included here, because it didn't get parsed
+        # into a Comment object--it's part of the Script string.
+        self.assertEqual(script.div.get_text(), "a")
+        self.assertEqual(list(script.div.strings), ["a"])
+        self.assertEqual(script.div.script.get_text(),
+                         "<!--a comment-->Some text")
+        self.assertEqual(list(script.div.script.strings),
+                         ['<!--a comment-->Some text'])
 
 class TestCDAtaListAttributes(SoupTest):
 
@@ -1524,7 +1589,7 @@
         soup = BeautifulSoup(b'<p>&nbsp;</p>', 'html.parser')
         encoding = soup.original_encoding
         copy = soup.__copy__()
-        self.assertEqual(u"<p> </p>", unicode(copy))
+        self.assertEqual("<p> </p>", str(copy))
         self.assertEqual(encoding, copy.original_encoding)
 
     def test_copy_preserves_builder_information(self):
@@ -1554,14 +1619,14 @@
         
     def test_unicode_pickle(self):
         # A tree containing Unicode characters can be pickled.
-        html = u"<b>\N{SNOWMAN}</b>"
+        html = "<b>\N{SNOWMAN}</b>"
         soup = self.soup(html)
         dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL)
         loaded = pickle.loads(dumped)
         self.assertEqual(loaded.decode(), soup.decode())
 
     def test_copy_navigablestring_is_not_attached_to_tree(self):
-        html = u"<b>Foo<a></a></b><b>Bar</b>"
+        html = "<b>Foo<a></a></b><b>Bar</b>"
         soup = self.soup(html)
         s1 = soup.find(string="Foo")
         s2 = copy.copy(s1)
@@ -1573,7 +1638,7 @@
         self.assertEqual(None, s2.previous_element)
 
     def test_copy_navigablestring_subclass_has_same_type(self):
-        html = u"<b><!--Foo--></b>"
+        html = "<b><!--Foo--></b>"
         soup = self.soup(html)
         s1 = soup.string
         s2 = copy.copy(s1)
@@ -1581,19 +1646,19 @@
         self.assertTrue(isinstance(s2, Comment))
 
     def test_copy_entire_soup(self):
-        html = u"<div><b>Foo<a></a></b><b>Bar</b></div>end"
+        html = "<div><b>Foo<a></a></b><b>Bar</b></div>end"
         soup = self.soup(html)
         soup_copy = copy.copy(soup)
         self.assertEqual(soup, soup_copy)
 
     def test_copy_tag_copies_contents(self):
-        html = u"<div><b>Foo<a></a></b><b>Bar</b></div>end"
+        html = "<div><b>Foo<a></a></b><b>Bar</b></div>end"
         soup = self.soup(html)
         div = soup.div
         div_copy = copy.copy(div)
 
         # The two tags look the same, and evaluate to equal.
-        self.assertEqual(unicode(div), unicode(div_copy))
+        self.assertEqual(str(div), str(div_copy))
         self.assertEqual(div, div_copy)
 
         # But they're not the same object.
@@ -1609,17 +1674,17 @@
 class TestSubstitutions(SoupTest):
 
     def test_default_formatter_is_minimal(self):
-        markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
+        markup = "<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
         soup = self.soup(markup)
         decoded = soup.decode(formatter="minimal")
         # The < is converted back into &lt; but the e-with-acute is left alone.
         self.assertEqual(
             decoded,
             self.document_for(
-                u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"))
+                "<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"))
 
     def test_formatter_html(self):
-        markup = u"<br><b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
+        markup = "<br><b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
         soup = self.soup(markup)
         decoded = soup.decode(formatter="html")
         self.assertEqual(
@@ -1627,7 +1692,7 @@
             self.document_for("<br/><b>&lt;&lt;Sacr&eacute; bleu!&gt;&gt;</b>"))
 
     def test_formatter_html5(self):
-        markup = u"<br><b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
+        markup = "<br><b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
         soup = self.soup(markup)
         decoded = soup.decode(formatter="html5")
         self.assertEqual(
@@ -1635,49 +1700,49 @@
             self.document_for("<br><b>&lt;&lt;Sacr&eacute; bleu!&gt;&gt;</b>"))
         
     def test_formatter_minimal(self):
-        markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
+        markup = "<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
         soup = self.soup(markup)
         decoded = soup.decode(formatter="minimal")
         # The < is converted back into &lt; but the e-with-acute is left alone.
         self.assertEqual(
             decoded,
             self.document_for(
-                u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"))
+                "<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"))
 
     def test_formatter_null(self):
-        markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
+        markup = "<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
         soup = self.soup(markup)
         decoded = soup.decode(formatter=None)
         # Neither the angle brackets nor the e-with-acute are converted.
         # This is not valid HTML, but it's what the user wanted.
         self.assertEqual(decoded,
-                          self.document_for(u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>"))
+                          self.document_for("<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>"))
 
     def test_formatter_custom(self):
-        markup = u"<b>&lt;foo&gt;</b><b>bar</b><br/>"
+        markup = "<b>&lt;foo&gt;</b><b>bar</b><br/>"
         soup = self.soup(markup)
         decoded = soup.decode(formatter = lambda x: x.upper())
         # Instead of normal entity conversion code, the custom
         # callable is called on every string.
         self.assertEqual(
             decoded,
-            self.document_for(u"<b><FOO></b><b>BAR</b><br/>"))
+            self.document_for("<b><FOO></b><b>BAR</b><br/>"))
 
     def test_formatter_is_run_on_attribute_values(self):
-        markup = u'<a href="http://a.com?a=b&c=é">e</a>'
+        markup = '<a href="http://a.com?a=b&c=é">e</a>'
         soup = self.soup(markup)
         a = soup.a
 
-        expect_minimal = u'<a href="http://a.com?a=b&amp;c=é">e</a>'
+        expect_minimal = '<a href="http://a.com?a=b&amp;c=é">e</a>'
 
         self.assertEqual(expect_minimal, a.decode())
         self.assertEqual(expect_minimal, a.decode(formatter="minimal"))
 
-        expect_html = u'<a href="http://a.com?a=b&amp;c=&eacute;">e</a>'
+        expect_html = '<a href="http://a.com?a=b&amp;c=&eacute;">e</a>'
         self.assertEqual(expect_html, a.decode(formatter="html"))
 
         self.assertEqual(markup, a.decode(formatter=None))
-        expect_upper = u'<a href="HTTP://A.COM?A=B&C=É">E</a>'
+        expect_upper = '<a href="HTTP://A.COM?A=B&C=É">E</a>'
         self.assertEqual(expect_upper, a.decode(formatter=lambda x: x.upper()))
 
     def test_formatter_skips_script_tag_for_html_documents(self):
@@ -1703,7 +1768,7 @@
         # Everything outside the <pre> tag is reformatted, but everything
         # inside is left alone.
         self.assertEqual(
-            u'<div>\n foo\n <pre>  \tbar\n  \n  </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>',
+            '<div>\n foo\n <pre>  \tbar\n  \n  </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>',
             soup.div.prettify())
 
     def test_prettify_accepts_formatter_function(self):
@@ -1713,14 +1778,14 @@
 
     def test_prettify_outputs_unicode_by_default(self):
         soup = self.soup("<a></a>")
-        self.assertEqual(unicode, type(soup.prettify()))
+        self.assertEqual(str, type(soup.prettify()))
 
     def test_prettify_can_encode_data(self):
         soup = self.soup("<a></a>")
         self.assertEqual(bytes, type(soup.prettify("utf-8")))
 
     def test_html_entity_substitution_off_by_default(self):
-        markup = u"<b>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</b>"
+        markup = "<b>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</b>"
         soup = self.soup(markup)
         encoded = soup.b.encode("utf-8")
         self.assertEqual(encoded, markup.encode('utf-8'))
@@ -1764,161 +1829,55 @@
     """Test the ability to encode objects into strings."""
 
     def test_unicode_string_can_be_encoded(self):
-        html = u"<b>\N{SNOWMAN}</b>"
+        html = "<b>\N{SNOWMAN}</b>"
         soup = self.soup(html)
         self.assertEqual(soup.b.string.encode("utf-8"),
-                          u"\N{SNOWMAN}".encode("utf-8"))
+                          "\N{SNOWMAN}".encode("utf-8"))
 
     def test_tag_containing_unicode_string_can_be_encoded(self):
-        html = u"<b>\N{SNOWMAN}</b>"
+        html = "<b>\N{SNOWMAN}</b>"
         soup = self.soup(html)
         self.assertEqual(
             soup.b.encode("utf-8"), html.encode("utf-8"))
 
     def test_encoding_substitutes_unrecognized_characters_by_default(self):
-        html = u"<b>\N{SNOWMAN}</b>"
+        html = "<b>\N{SNOWMAN}</b>"
         soup = self.soup(html)
         self.assertEqual(soup.b.encode("ascii"), b"<b>&#9731;</b>")
 
     def test_encoding_can_be_made_strict(self):
-        html = u"<b>\N{SNOWMAN}</b>"
+        html = "<b>\N{SNOWMAN}</b>"
         soup = self.soup(html)
         self.assertRaises(
             UnicodeEncodeError, soup.encode, "ascii", errors="strict")
 
     def test_decode_contents(self):
-        html = u"<b>\N{SNOWMAN}</b>"
+        html = "<b>\N{SNOWMAN}</b>"
         soup = self.soup(html)
-        self.assertEqual(u"\N{SNOWMAN}", soup.b.decode_contents())
+        self.assertEqual("\N{SNOWMAN}", soup.b.decode_contents())
 
     def test_encode_contents(self):
-        html = u"<b>\N{SNOWMAN}</b>"
+        html = "<b>\N{SNOWMAN}</b>"
         soup = self.soup(html)
         self.assertEqual(
-            u"\N{SNOWMAN}".encode("utf8"), soup.b.encode_contents(
+            "\N{SNOWMAN}".encode("utf8"), soup.b.encode_contents(
                 encoding="utf8"))
 
     def test_deprecated_renderContents(self):
-        html = u"<b>\N{SNOWMAN}</b>"
+        html = "<b>\N{SNOWMAN}</b>"
         soup = self.soup(html)
         self.assertEqual(
-            u"\N{SNOWMAN}".encode("utf8"), soup.b.renderContents())
+            "\N{SNOWMAN}".encode("utf8"), soup.b.renderContents())
 
     def test_repr(self):
-        html = u"<b>\N{SNOWMAN}</b>"
+        html = "<b>\N{SNOWMAN}</b>"
         soup = self.soup(html)
         if PY3K:
             self.assertEqual(html, repr(soup))
         else:
             self.assertEqual(b'<b>\\u2603</b>', repr(soup))
 
-class TestFormatter(SoupTest):
-
-    def test_default_attributes(self):
-        # Test the default behavior of Formatter.attributes().
-        formatter = Formatter()
-        tag = Tag(name="tag")
-        tag['b'] = 1
-        tag['a'] = 2
-
-        # Attributes come out sorted by name. In Python 3, attributes
-        # normally come out of a dictionary in the order they were
-        # added.
-        self.assertEquals([('a', 2), ('b', 1)], formatter.attributes(tag))
-
-        # This works even if Tag.attrs is None, though this shouldn't
-        # normally happen.
-        tag.attrs = None
-        self.assertEquals([], formatter.attributes(tag))
-        
-    def test_sort_attributes(self):
-        # Test the ability to override Formatter.attributes() to,
-        # e.g., disable the normal sorting of attributes.
-        class UnsortedFormatter(Formatter):
-            def attributes(self, tag):
-                self.called_with = tag
-                for k, v in sorted(tag.attrs.items()):
-                    if k == 'ignore':
-                        continue
-                    yield k,v
-
-        soup = self.soup('<p cval="1" aval="2" ignore="ignored"></p>')
-        formatter = UnsortedFormatter()
-        decoded = soup.decode(formatter=formatter)
-
-        # attributes() was called on the <p> tag. It filtered out one
-        # attribute and sorted the other two.
-        self.assertEquals(formatter.called_with, soup.p)
-        self.assertEquals(u'<p aval="2" cval="1"></p>', decoded)
-
-
-class TestNavigableStringSubclasses(SoupTest):
-
-    def test_cdata(self):
-        # None of the current builders turn CDATA sections into CData
-        # objects, but you can create them manually.
-        soup = self.soup("")
-        cdata = CData("foo")
-        soup.insert(1, cdata)
-        self.assertEqual(str(soup), "<![CDATA[foo]]>")
-        self.assertEqual(soup.find(text="foo"), "foo")
-        self.assertEqual(soup.contents[0], "foo")
-
-    def test_cdata_is_never_formatted(self):
-        """Text inside a CData object is passed into the formatter.
-
-        But the return value is ignored.
-        """
-
-        self.count = 0
-        def increment(*args):
-            self.count += 1
-            return "BITTER FAILURE"
-
-        soup = self.soup("")
-        cdata = CData("<><><>")
-        soup.insert(1, cdata)
-        self.assertEqual(
-            b"<![CDATA[<><><>]]>", soup.encode(formatter=increment))
-        self.assertEqual(1, self.count)
-
-    def test_doctype_ends_in_newline(self):
-        # Unlike other NavigableString subclasses, a DOCTYPE always ends
-        # in a newline.
-        doctype = Doctype("foo")
-        soup = self.soup("")
-        soup.insert(1, doctype)
-        self.assertEqual(soup.encode(), b"<!DOCTYPE foo>\n")
-
-    def test_declaration(self):
-        d = Declaration("foo")
-        self.assertEqual("<?foo?>", d.output_ready())
-
-    def test_default_string_containers(self):
-        # In some cases, we use different NavigableString subclasses for
-        # the same text in different tags.
-        soup = self.soup(
-            "<div>text</div><script>text</script><style>text</style>"
-        )
-        self.assertEqual(
-            [NavigableString, Script, Stylesheet],
-            [x.__class__ for x in soup.find_all(text=True)]
-        )
-
-        # The TemplateString is a little unusual because it's generally found
-        # _inside_ children of a <template> element, not a direct child of the
-        # <template> element.
-        soup = self.soup(
-            "<template>Some text<p>In a tag</p></template>Some text outside"
-        )
-        assert all(isinstance(x, TemplateString) for x in soup.template.strings)
-
-        # Once the <template> tag closed, we went back to using
-        # NavigableString.
-        outside = soup.template.next_sibling
-        assert isinstance(outside, NavigableString)
-        assert not isinstance(outside, TemplateString)
-
+        
 class TestSoupSelector(TreeTest):
 
     HTML = """
@@ -1993,7 +1952,7 @@
         els = self.soup.select('title')
         self.assertEqual(len(els), 1)
         self.assertEqual(els[0].name, 'title')
-        self.assertEqual(els[0].contents, [u'The title'])
+        self.assertEqual(els[0].contents, ['The title'])
 
     def test_one_tag_many(self):
         els = self.soup.select('div')
@@ -2039,7 +1998,7 @@
         self.assertEqual(dashed[0]['id'], 'dash2')
 
     def test_dashed_tag_text(self):
-        self.assertEqual(self.soup.select('body > custom-dashed-tag')[0].text, u'Hello there.')
+        self.assertEqual(self.soup.select('body > custom-dashed-tag')[0].text, 'Hello there.')
 
     def test_select_dashed_matches_find_all(self):
         self.assertEqual(self.soup.select('custom-dashed-tag'), self.soup.find_all('custom-dashed-tag'))
@@ -2225,12 +2184,12 @@
         # Try to select first paragraph
         els = self.soup.select('div#inner p:nth-of-type(1)')
         self.assertEqual(len(els), 1)
-        self.assertEqual(els[0].string, u'Some text')
+        self.assertEqual(els[0].string, 'Some text')
 
         # Try to select third paragraph
         els = self.soup.select('div#inner p:nth-of-type(3)')
         self.assertEqual(len(els), 1)
-        self.assertEqual(els[0].string, u'Another')
+        self.assertEqual(els[0].string, 'Another')
 
         # Try to select (non-existent!) fourth paragraph
         els = self.soup.select('div#inner p:nth-of-type(4)')
@@ -2243,7 +2202,7 @@
     def test_nth_of_type_direct_descendant(self):
         els = self.soup.select('div#inner > p:nth-of-type(1)')
         self.assertEqual(len(els), 1)
-        self.assertEqual(els[0].string, u'Some text')
+        self.assertEqual(els[0].string, 'Some text')
 
     def test_id_child_selector_nth_of_type(self):
         self.assertSelects('#inner > p:nth-of-type(2)', ['p1'])
@@ -2324,7 +2283,7 @@
         markup = '<div class="c1"/><div class="c2"/><div class="c1"/>'
         soup = BeautifulSoup(markup, 'html.parser')
         selected = soup.select(".c1, .c2")
-        self.assertEquals(3, len(selected))
+        self.assertEqual(3, len(selected))
 
         # Verify that find_all finds the same elements, though because
         # of an implementation detail it finds them in a different
diff -Nru beautifulsoup4-4.9.3/convert-py3k beautifulsoup4-4.10.0/convert-py3k
--- beautifulsoup4-4.9.3/convert-py3k	2013-08-12 15:47:12.000000000 +0000
+++ beautifulsoup4-4.10.0/convert-py3k	1970-01-01 00:00:00.000000000 +0000
@@ -1,16 +0,0 @@
-#!/bin/sh
-#
-# The Python 2 source is the definitive source. This script uses 2to3-3.2 to
-# create a new python3/bs4 source tree that works under Python 3.
-#
-# See README.txt to see how to run the test suite after conversion.
-echo "About to destroy and rebuild the py3k/bs4 directory."
-echo "If you've got stuff in there, Ctrl-C out of this script or answer 'n'."
-mkdir -p py3k
-rm -rfI py3k/bs4
-cp -r bs4/ py3k/
-2to3 -w py3k
-echo ""
-echo "OK, conversion is done."
-echo "Now running the unit tests."
-(cd py3k && python3 -m unittest discover -s bs4)
\ No newline at end of file
diff -Nru beautifulsoup4-4.9.3/debian/changelog beautifulsoup4-4.10.0/debian/changelog
--- beautifulsoup4-4.9.3/debian/changelog	2020-10-08 03:53:02.000000000 +0000
+++ beautifulsoup4-4.10.0/debian/changelog	2021-10-24 18:39:39.000000000 +0000
@@ -1,3 +1,23 @@
+beautifulsoup4 (4.10.0-2) unstable; urgency=medium
+
+  * Replace nose with pytest (see: #997758).
+
+ -- Stefano Rivera <stefanor@debian.org>  Sun, 24 Oct 2021 11:39:39 -0700
+
+beautifulsoup4 (4.10.0-1) unstable; urgency=medium
+
+  * New upstream release. (Closes: #997146)
+  * Add upstream metadata.
+  * Switch watch file to PyPI, the author's home page is currently out of
+    date.
+  * Bump copyright years.
+  * Adjust autopkgtest to handle bs4.tests not being shipped in the installed
+    package.
+  * Bump Standards-Version to 4.6.0, no changes needed.
+  * Bump debhelper compat level to 13.
+
+ -- Stefano Rivera <stefanor@debian.org>  Sat, 23 Oct 2021 15:04:48 -0700
+
 beautifulsoup4 (4.9.3-1) unstable; urgency=medium
 
   * New upstream release.
diff -Nru beautifulsoup4-4.9.3/debian/control beautifulsoup4-4.10.0/debian/control
--- beautifulsoup4-4.9.3/debian/control	2020-10-08 03:53:02.000000000 +0000
+++ beautifulsoup4-4.10.0/debian/control	2021-10-24 18:39:39.000000000 +0000
@@ -4,7 +4,7 @@
 Maintainer: Debian Python Team <team+python@tracker.debian.org>
 Uploaders: Stefano Rivera <stefanor@debian.org>
 Build-Depends:
- debhelper-compat (= 12),
+ debhelper-compat (= 13),
  dh-python,
  python3-all (>= 3.1.2),
  python3-html5lib <!nocheck>,
@@ -13,7 +13,7 @@
  python3-soupsieve <!nocheck>,
  python3-sphinx,
  python3-stemmer
-Standards-Version: 4.5.0
+Standards-Version: 4.6.0
 Homepage: https://www.crummy.com/software/BeautifulSoup
 Vcs-Git: https://salsa.debian.org/python-team/packages/beautifulsoup4.git
 Vcs-Browser: https://salsa.debian.org/python-team/packages/beautifulsoup4
diff -Nru beautifulsoup4-4.9.3/debian/copyright beautifulsoup4-4.10.0/debian/copyright
--- beautifulsoup4-4.9.3/debian/copyright	2020-10-08 03:53:02.000000000 +0000
+++ beautifulsoup4-4.10.0/debian/copyright	2021-10-24 18:39:39.000000000 +0000
@@ -4,7 +4,7 @@
 Source: https://launchpad.net/beautifulsoup
 
 Files: *
-Copyright: 2004-2020, Leonard Richardsonn
+Copyright: 2004-2021, Leonard Richardsonn
 License: Expat
 Comment:
  Beautiful Soup incorporates code from the html5lib library, which is also made
@@ -18,7 +18,7 @@
 Files: debian/*
 Copyright:
  2005-2009, Decklin Foster <decklin@red-bean.com>
- 2011-2020, Stefano Rivera <stefanor@debian.org>
+ 2011-2021, Stefano Rivera <stefanor@debian.org>
 License: Expat
 
 License: Expat
diff -Nru beautifulsoup4-4.9.3/debian/tests/control beautifulsoup4-4.10.0/debian/tests/control
--- beautifulsoup4-4.9.3/debian/tests/control	2020-10-08 03:53:02.000000000 +0000
+++ beautifulsoup4-4.10.0/debian/tests/control	2021-10-24 18:39:39.000000000 +0000
@@ -3,5 +3,5 @@
  python3-all,
  python3-bs4,
  python3-html5lib,
- python3-lxml,
- python3-nose (>= 1.3)
+ python3-pytest,
+ python3-lxml
diff -Nru beautifulsoup4-4.9.3/debian/tests/unittests3 beautifulsoup4-4.10.0/debian/tests/unittests3
--- beautifulsoup4-4.9.3/debian/tests/unittests3	2020-10-08 03:53:02.000000000 +0000
+++ beautifulsoup4-4.10.0/debian/tests/unittests3	2021-10-24 18:39:39.000000000 +0000
@@ -3,9 +3,15 @@
 
 pys="$(py3versions -r 2> /dev/null)"
 
+cp -a bs4/tests "$AUTOPKGTEST_TMP"
+cat > "$AUTOPKGTEST_TMP/tests/__init__.py" <<EOF
+import sys
+sys.modules['bs4.tests'] = sys.modules[__name__]
+EOF
+
 cd "$AUTOPKGTEST_TMP"
 
 for py in $pys; do
 	echo "=== $py ==="
-	$py -m nose bs4.tests 2>&1
+	$py -m pytest tests
 done
diff -Nru beautifulsoup4-4.9.3/debian/upstream/metadata beautifulsoup4-4.10.0/debian/upstream/metadata
--- beautifulsoup4-4.9.3/debian/upstream/metadata	1970-01-01 00:00:00.000000000 +0000
+++ beautifulsoup4-4.10.0/debian/upstream/metadata	2021-10-24 18:39:39.000000000 +0000
@@ -0,0 +1,10 @@
+---
+Bug-Database: https://bugs.launchpad.net/beautifulsoup/
+Bug-Submit: https://bugs.launchpad.net/beautifulsoup/+filebug
+Changelog: https://bazaar.launchpad.net/~leonardr/beautifulsoup/bs4/view/head:/CHANGELOG
+Documentation: https://www.crummy.com/software/BeautifulSoup/bs4/doc/
+Registry:
+  - Name: PyPI
+    Entry: beautifulsoup4
+Repository-Browse: https://bazaar.launchpad.net/~leonardr/beautifulsoup/bs4/files
+Repository: https://code.launchpad.net/~leonardr/beautifulsoup/bs4
diff -Nru beautifulsoup4-4.9.3/debian/watch beautifulsoup4-4.10.0/debian/watch
--- beautifulsoup4-4.9.3/debian/watch	2020-10-08 03:53:02.000000000 +0000
+++ beautifulsoup4-4.10.0/debian/watch	2021-10-24 18:39:39.000000000 +0000
@@ -1,3 +1,3 @@
-version=3
-opts=uversionmangle=s/([ab]\d+)/~$1/ \
-https://www.crummy.com/software/BeautifulSoup/bs4/download/(\d\.\d)/beautifulsoup4-([\d.abrc]+)\.tar\.gz
+version=4
+opts=uversionmangle=s/(rc|a|b|c)/~$1/ \
+https://pypi.debian.net/beautifulsoup4/beautifulsoup4-(.+)\.(?:zip|tgz|tbz|txz|(?:tar\.(?:gz|bz2|xz)))
diff -Nru beautifulsoup4-4.9.3/doc/source/index.rst beautifulsoup4-4.10.0/doc/source/index.rst
--- beautifulsoup4-4.9.3/doc/source/index.rst	2020-09-26 15:54:51.000000000 +0000
+++ beautifulsoup4-4.10.0/doc/source/index.rst	2021-09-07 23:49:55.000000000 +0000
@@ -18,14 +18,13 @@
 how to use it, how to make it do what you want, and what to do when it
 violates your expectations.
 
-This document covers Beautiful Soup version 4.9.2. The examples in
-this documentation should work the same way in Python 2.7 and Python
-3.8.
+This document covers Beautiful Soup version 4.10.0. The examples in
+this documentation were written for Python 3.8.
 
 You might be looking for the documentation for `Beautiful Soup 3
 <http://www.crummy.com/software/BeautifulSoup/bs3/documentation.html>`_.
 If so, you should know that Beautiful Soup 3 is no longer being
-developed and that support for it will be dropped on or after December
+developed and that all support for it was dropped on December
 31, 2020. If you want to learn about the differences between Beautiful
 Soup 3 and Beautiful Soup 4, see `Porting code to BS4`_.
 
@@ -170,16 +169,13 @@
 If you're using a recent version of Debian or Ubuntu Linux, you can
 install Beautiful Soup with the system package manager:
 
-:kbd:`$ apt-get install python-bs4` (for Python 2)
-
-:kbd:`$ apt-get install python3-bs4` (for Python 3)
+:kbd:`$ apt-get install python3-bs4`
 
 Beautiful Soup 4 is published through PyPi, so if you can't install it
 with the system packager, you can install it with ``easy_install`` or
-``pip``. The package name is ``beautifulsoup4``, and the same package
-works on Python 2 and Python 3. Make sure you use the right version of
-``pip`` or ``easy_install`` for your Python version (these may be named
-``pip3`` and ``easy_install3`` respectively if you're using Python 3).
+``pip``. The package name is ``beautifulsoup4``. Make sure you use the
+right version of ``pip`` or ``easy_install`` for your Python version
+(these may be named ``pip3`` and ``easy_install3`` respectively).
 
 :kbd:`$ easy_install beautifulsoup4`
 
@@ -202,40 +198,8 @@
 tarball, copy its ``bs4`` directory into your application's codebase,
 and use Beautiful Soup without installing it at all.
 
-I use Python 2.7 and Python 3.8 to develop Beautiful Soup, but it
-should work with other recent versions.
-
-Problems after installation
----------------------------
-
-Beautiful Soup is packaged as Python 2 code. When you install it for
-use with Python 3, it's automatically converted to Python 3 code. If
-you don't install the package, the code won't be converted. There have
-also been reports on Windows machines of the wrong version being
-installed.
-
-If you get the ``ImportError`` "No module named HTMLParser", your
-problem is that you're running the Python 2 version of the code under
-Python 3.
-
-If you get the ``ImportError`` "No module named html.parser", your
-problem is that you're running the Python 3 version of the code under
-Python 2.
-
-In both cases, your best bet is to completely remove the Beautiful
-Soup installation from your system (including any directory created
-when you unzipped the tarball) and try the installation again.
-
-If you get the ``SyntaxError`` "Invalid syntax" on the line
-``ROOT_TAG_NAME = u'[document]'``, you need to convert the Python 2
-code to Python 3. You can do this either by installing the package:
-
-:kbd:`$ python3 setup.py install`
-
-or by manually running Python's ``2to3`` conversion script on the
-``bs4`` directory:
-
-:kbd:`$ 2to3-3.2 -w bs4`
+I use Python 3.8 to develop Beautiful Soup, but it should work with
+other recent versions.
 
 .. _parser-installation:
 
@@ -272,8 +236,7 @@
 +----------------------+--------------------------------------------+--------------------------------+--------------------------+
 | Python's html.parser | ``BeautifulSoup(markup, "html.parser")``   | * Batteries included           | * Not as fast as lxml,   |
 |                      |                                            | * Decent speed                 |   less lenient than      |
-|                      |                                            | * Lenient (As of Python 2.7.3  |   html5lib.              |
-|                      |                                            |   and 3.2.)                    |                          |
+|                      |                                            | * Lenient (As of Python 3.2)   |   html5lib.              |
 +----------------------+--------------------------------------------+--------------------------------+--------------------------+
 | lxml's HTML parser   | ``BeautifulSoup(markup, "lxml")``          | * Very fast                    | * External C dependency  |
 |                      |                                            | * Lenient                      |                          |
@@ -289,9 +252,9 @@
 +----------------------+--------------------------------------------+--------------------------------+--------------------------+
 
 If you can, I recommend you install and use lxml for speed. If you're
-using a very old version of Python -- earlier than 2.7.3 or 3.2.2 --
-it's `essential` that you install lxml or html5lib. Python's built-in
-HTML parser is just not very good in those old versions.
+using a very old version of Python -- earlier than 3.2.2 -- it's
+`essential` that you install lxml or html5lib. Python's built-in HTML
+parser is just not very good in those old versions.
 
 Note that if a document is invalid, different parsers will generate
 different Beautiful Soup trees for it. See `Differences
@@ -439,7 +402,7 @@
  no_list_soup.p['class']
  # 'body strikeout'
 
-You can use ```get_attribute_list`` to get a value that's always a
+You can use ``get_attribute_list`` to get a value that's always a
 list, whether or not it's a multi-valued atribute::
 
  id_soup.p.get_attribute_list('id')
@@ -481,8 +444,7 @@
 A ``NavigableString`` is just like a Python Unicode string, except
 that it also supports some of the features described in `Navigating
 the tree`_ and `Searching the tree`_. You can convert a
-``NavigableString`` to a Unicode string with ``unicode()`` (in
-Python 2) or ``str`` (in Python 3)::
+``NavigableString`` to a Unicode string with ``str``::
 
  unicode_string = str(tag.string)
  unicode_string
@@ -2097,22 +2059,35 @@
 ------------------
 
 ``PageElement.replace_with()`` removes a tag or string from the tree,
-and replaces it with the tag or string of your choice::
+and replaces it with one or more tags or strings of your choice::
 
  markup = '<a href="http://example.com/">I linked to <i>example.com</i></a>'
  soup = BeautifulSoup(markup, 'html.parser')
  a_tag = soup.a
 
  new_tag = soup.new_tag("b")
- new_tag.string = "example.net"
+ new_tag.string = "example.com"
  a_tag.i.replace_with(new_tag)
 
  a_tag
- # <a href="http://example.com/">I linked to <b>example.net</b></a>
+ # <a href="http://example.com/">I linked to <b>example.com</b></a>
 
-``replace_with()`` returns the tag or string that was replaced, so
+ bold_tag = soup.new_tag("b")
+ bold_tag.string = "example"
+ i_tag = soup.new_tag("i")
+ i_tag.string = "net"
+ a_tag.b.replace_with(bold_tag, ".", i_tag)
+
+ a_tag
+ # <a href="http://example.com/">I linked to <b>example</b>.<i>net</i></a>
+
+``replace_with()`` returns the tag or string that got replaced, so
 that you can examine it or add it back to another part of the tree.
 
+`The ability to pass multiple arguments into replace_with() is new
+in Beautiful Soup 4.10.0.`
+
+
 ``wrap()``
 ----------
 
@@ -2126,7 +2101,7 @@
  soup.p.wrap(soup.new_tag("div"))
  # <div><p><b>I wish I was bold.</b></p></div>
 
-This method is new in Beautiful Soup 4.0.5.
+`This method is new in Beautiful Soup 4.0.5.`
 
 ``unwrap()``
 ---------------------------
@@ -2177,7 +2152,7 @@
  #  A one, a two
  # </p>
 
-The ``smooth()`` method is new in Beautiful Soup 4.8.0.
+`This method is new in Beautiful Soup 4.8.0.`
 
 Output
 ======
@@ -2230,8 +2205,7 @@
 -------------------
 
 If you just want a string, with no fancy formatting, you can call
-``str()`` on a ``BeautifulSoup`` object (``unicode()`` in Python 2),
-or on a ``Tag`` within it::
+``str()`` on a ``BeautifulSoup`` object, or on a ``Tag`` within it::
 
  str(soup)
  # '<html><head></head><body><a href="http://example.com/">I linked to <i>example.com</i></a></body></html>'
@@ -2299,7 +2273,7 @@
  #  Il a dit &lt;&lt;Sacr&eacute; bleu!&gt;&gt;
  # </p>
 
-If you pass in ``formatter="html5"``, it's the same as
+If you pass in ``formatter="html5"``, it's similar to
 ``formatter="html"``, but Beautiful Soup will
 omit the closing slash in HTML void tags like "br"::
 
@@ -2310,6 +2284,18 @@
  
  print(br.encode(formatter="html5"))
  # b'<br>'
+
+In addition, any attributes whose values are the empty string
+will become HTML-style boolean attributes::
+
+ option = BeautifulSoup('<option selected=""></option>').option
+ print(option.encode(formatter="html"))
+ # b'<option selected=""></option>'
+ 
+ print(option.encode(formatter="html5"))
+ # b'<option selected></option>'
+
+*(This behavior is new as of Beautiful Soup 4.10.0.)*
  
 If you pass in ``formatter=None``, Beautiful Soup will not modify
 strings at all on output. This is the fastest option, but it may lead
@@ -2419,9 +2405,14 @@
 
 *As of Beautiful Soup version 4.9.0, when lxml or html.parser are in
 use, the contents of <script>, <style>, and <template>
-tags are not considered to be 'text', since those tags are not part of
+tags are generally not considered to be 'text', since those tags are not part of
 the human-visible content of the page.*
 
+*As of Beautiful Soup version 4.10.0, you can call get_text(),
+.strings, or .stripped_strings on a NavigableString object. It will
+either return the object itself, or nothing, so the only reason to do
+this is when you're iterating over a mixed list.*
+
  
 Specifying the parser to use
 ============================
@@ -3139,10 +3130,10 @@
 -------------------------
 
 * ``SyntaxError: Invalid syntax`` (on the line ``ROOT_TAG_NAME =
-  '[document]'``): Caused by running the Python 2 version of
+  '[document]'``): Caused by running an old Python 2 version of
   Beautiful Soup under Python 3, without converting the code.
 
-* ``ImportError: No module named HTMLParser`` - Caused by running the
+* ``ImportError: No module named HTMLParser`` - Caused by running an old
   Python 2 version of Beautiful Soup under Python 3.
 
 * ``ImportError: No module named html.parser`` - Caused by running the
diff -Nru beautifulsoup4-4.9.3/NEWS.txt beautifulsoup4-4.10.0/NEWS.txt
--- beautifulsoup4-4.9.3/NEWS.txt	2020-10-03 15:30:36.000000000 +0000
+++ beautifulsoup4-4.10.0/NEWS.txt	2021-09-08 00:09:00.000000000 +0000
@@ -1,3 +1,85 @@
+Beautiful Soup's official support for Python 2 ended on December 31st,
+2020. The final release to support Python 2 was Beautiful Soup
+4.9.3. In the Launchpad Bazaar repository, the final revision to support
+Python 2 was revision 605.
+
+= 4.10.0 (20210907)
+
+* This is the first release of Beautiful Soup to only support Python
+  3. I dropped Python 2 support to maintain support for newer versions
+  (58 and up) of setuptools. See:
+  https://github.com/pypa/setuptools/issues/2769 [bug=1942919]
+
+* The behavior of methods like .get_text() and .strings now differs
+  depending on the type of tag. The change is visible with HTML tags
+  like <script>, <style>, and <template>. Starting in 4.9.0, methods
+  like get_text() returned no results on such tags, because the
+  contents of those tags are not considered 'text' within the document
+  as a whole.
+
+  But a user who calls script.get_text() is working from a different
+  definition of 'text' than a user who calls div.get_text()--otherwise
+  there would be no need to call script.get_text() at all. In 4.10.0,
+  the contents of (e.g.) a <script> tag are considered 'text' during a
+  get_text() call on the tag itself, but not considered 'text' during
+  a get_text() call on the tag's parent.
+
+  Because of this change, calling get_text() on each child of a tag
+  may now return a different result than calling get_text() on the tag
+  itself. That's because different tags now have different
+  understandings of what counts as 'text'. [bug=1906226] [bug=1868861]
+
+* NavigableString and its subclasses now implement the get_text()
+  method, as well as the properties .strings and
+  .stripped_strings. These methods will either return the string
+  itself, or nothing, so the only reason to use this is when iterating
+  over a list of mixed Tag and NavigableString objects. [bug=1904309]
+
+* The 'html5' formatter now treats attributes whose values are the
+  empty string as HTML boolean attributes. Previously (and in other
+  formatters), an attribute value must be set as None to be treated as
+  a boolean attribute. In a future release, I plan to also give this
+  behavior to the 'html' formatter. Patch by Isaac Muse. [bug=1915424]
+
+* The 'replace_with()' method now takes a variable number of arguments,
+  and can be used to replace a single element with a sequence of elements.
+  Patch by Bill Chandos. [rev=605]
+
+* Corrected output when the namespace prefix associated with a
+  namespaced attribute is the empty string, as opposed to
+  None. [bug=1915583]
+
+* Performance improvement when processing tags that speeds up overall
+  tree construction by 2%. Patch by Morotti. [bug=1899358]
+
+* Corrected the use of special string container classes in cases when a
+  single tag may contain strings with different containers; such as
+  the <template> tag, which may contain both TemplateString objects
+  and Comment objects. [bug=1913406]
+
+* The html.parser tree builder can now handle named entities
+  found in the HTML5 spec in much the same way that the html5lib
+  tree builder does. Note that the lxml HTML tree builder doesn't handle
+  named entities this way. [bug=1924908]
+
+* Added a second way to pass specify encodings to UnicodeDammit and
+  EncodingDetector, based on the order of precedence defined in the
+  HTML5 spec, starting at:
+  https://html.spec.whatwg.org/multipage/parsing.html#parsing-with-a-known-character-encoding
+
+  Encodings in 'known_definite_encodings' are tried first, then
+  byte-order-mark sniffing is run, then encodings in 'user_encodings'
+  are tried. The old argument, 'override_encodings', is now a
+  deprecated alias for 'known_definite_encodings'.
+
+  This changes the default behavior of the html.parser and lxml tree
+  builders, in a way that may slightly improve encoding
+  detection but will probably have no effect. [bug=1889014]
+
+* Improve the warning issued when a directory name (as opposed to
+  the name of a regular file) is passed as markup into the BeautifulSoup
+  constructor. [bug=1913628]
+
 = 4.9.3 (20201003)
 
 * Implemented a significant performance optimization to the process of
diff -Nru beautifulsoup4-4.9.3/parse.txt beautifulsoup4-4.10.0/parse.txt
--- beautifulsoup4-4.9.3/parse.txt	1970-01-01 00:00:00.000000000 +0000
+++ beautifulsoup4-4.10.0/parse.txt	2021-02-13 02:20:52.000000000 +0000
@@ -0,0 +1,2048 @@
+Beautiful soup diagnose test:
+==============================================================================
+Diagnostic running on Beautiful Soup 4.9.3
+Python version 2.7.18 (default, Aug  4 2020, 11:16:42) 
+[GCC 9.3.0]
+Found lxml version 4.2.3.0
+Found html5lib version 1.0.1
+
+Trying to parse your markup with html.parser
+Here's what html.parser did with the markup:
+<?xml version='1.0' encoding='UTF-8'?>
+<package unique-identifier="id" version="2.0" xmlns="http://www.idpf.org/2007/opf" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:opf="http://www.idpf.org/2007/opf" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <metadata>
+  <dc:rights>
+   Public domain in the USA.
+  </dc:rights>
+  <dc:identifier id="id" opf:scheme="URI">
+   http://www.gutenberg.org/2600
+  </dc:identifier>
+  <dc:creator opf:file-as="Tolstoy, Leo, graf">
+   graf Leo Tolstoy
+  </dc:creator>
+  <dc:contributor opf:file-as="Maude, Aylmer" opf:role="trl">
+   Aylmer Maude
+  </dc:contributor>
+  <dc:contributor opf:file-as="Maude, Louise" opf:role="trl">
+   Louise Maude
+  </dc:contributor>
+  <dc:title>
+   War and Peace
+  </dc:title>
+  <dc:language xsi:type="dcterms:RFC4646">
+   en
+  </dc:language>
+  <dc:subject>
+   Historical fiction
+  </dc:subject>
+  <dc:subject>
+   War stories
+  </dc:subject>
+  <dc:subject>
+   Napoleonic Wars, 1800-1815 -- Campaigns -- Russia -- Fiction
+  </dc:subject>
+  <dc:subject>
+   Russia -- History -- Alexander I, 1801-1825 -- Fiction
+  </dc:subject>
+  <dc:subject>
+   Aristocracy (Social class) -- Russia -- Fiction
+  </dc:subject>
+  <dc:date opf:event="publication">
+   2001-04-01
+  </dc:date>
+  <dc:date opf:event="conversion">
+   2020-04-02T07:55:23.696736+00:00
+  </dc:date>
+  <dc:source>
+   https://www.gutenberg.org/files/2600/2600-h/2600-h.htm
+  </dc:source>
+  <meta content="item1" name="cover"/>
+ </metadata>
+ <manifest>
+  <!--Image: 484 x 700 size=108909 q=90-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@images@cover.jpg" id="item1" media-type="image/jpeg">
+  </item>
+  <item href="pgepub.css" id="item2" media-type="text/css">
+  </item>
+  <item href="0.css" id="item3" media-type="text/css">
+  </item>
+  <item href="1.css" id="item4" media-type="text/css">
+  </item>
+  <!--Chunk: size=57550 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-0.htm.html" id="item5" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=58304 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-1.htm.html" id="item6" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=52760 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-2.htm.html" id="item7" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=57074 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-3.htm.html" id="item8" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=52377 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-4.htm.html" id="item9" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=70163 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-5.htm.html" id="item10" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=60805 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-6.htm.html" id="item11" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=61815 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-7.htm.html" id="item12" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=51653 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-8.htm.html" id="item13" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=64801 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-9.htm.html" id="item14" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=51852 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-10.htm.html" id="item15" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=70173 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-11.htm.html" id="item16" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=63821 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-12.htm.html" id="item17" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=53220 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-13.htm.html" id="item18" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=51538 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-14.htm.html" id="item19" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=56744 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-15.htm.html" id="item20" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=53690 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-16.htm.html" id="item21" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=58241 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-17.htm.html" id="item22" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=57692 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-18.htm.html" id="item23" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=58285 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-19.htm.html" id="item24" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=57202 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-20.htm.html" id="item25" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=63300 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-21.htm.html" id="item26" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=60258 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-22.htm.html" id="item27" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=55844 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-23.htm.html" id="item28" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=54441 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-24.htm.html" id="item29" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=54570 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-25.htm.html" id="item30" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=52393 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-26.htm.html" id="item31" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=51987 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-27.htm.html" id="item32" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=67760 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-28.htm.html" id="item33" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=63989 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-29.htm.html" id="item34" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=63702 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-30.htm.html" id="item35" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=59862 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-31.htm.html" id="item36" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=54267 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-32.htm.html" id="item37" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=59292 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-33.htm.html" id="item38" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=56661 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-34.htm.html" id="item39" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=60083 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-35.htm.html" id="item40" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=56200 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-36.htm.html" id="item41" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=56136 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-37.htm.html" id="item42" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=59126 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-38.htm.html" id="item43" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=53080 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-39.htm.html" id="item44" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=54926 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-40.htm.html" id="item45" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=67086 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-41.htm.html" id="item46" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=57293 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-42.htm.html" id="item47" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=54513 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-43.htm.html" id="item48" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=64103 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-44.htm.html" id="item49" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=57653 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-45.htm.html" id="item50" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=55452 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-46.htm.html" id="item51" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=59269 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-47.htm.html" id="item52" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=53393 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-48.htm.html" id="item53" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=51746 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-49.htm.html" id="item54" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=58540 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-50.htm.html" id="item55" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=66538 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-51.htm.html" id="item56" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=53240 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-52.htm.html" id="item57" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=54375 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-53.htm.html" id="item58" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=54268 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-54.htm.html" id="item59" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=54373 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-55.htm.html" id="item60" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=58744 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-56.htm.html" id="item61" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=53208 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-57.htm.html" id="item62" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=53405 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-58.htm.html" id="item63" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=53793 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-59.htm.html" id="item64" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=54766 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-60.htm.html" id="item65" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=55485 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-61.htm.html" id="item66" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=51658 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-62.htm.html" id="item67" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=52806 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-63.htm.html" id="item68" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=58713 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-64.htm.html" id="item69" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=57610 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-65.htm.html" id="item70" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=60847 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-66.htm.html" id="item71" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=57840 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-67.htm.html" id="item72" media-type="application/xhtml+xml">
+  </item>
+  <!--Chunk: size=68925-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-68.htm.html" id="item73" media-type="application/xhtml+xml">
+  </item>
+  <item href="toc.ncx" id="ncx" media-type="application/x-dtbncx+xml">
+  </item>
+  <item href="wrap0000.html" id="coverpage-wrapper" media-type="application/xhtml+xml">
+  </item>
+ </manifest>
+ <spine toc="ncx">
+  <itemref idref="coverpage-wrapper" linear="yes">
+  </itemref>
+  <itemref idref="item5" linear="yes">
+  </itemref>
+  <itemref idref="item6" linear="yes">
+  </itemref>
+  <itemref idref="item7" linear="yes">
+  </itemref>
+  <itemref idref="item8" linear="yes">
+  </itemref>
+  <itemref idref="item9" linear="yes">
+  </itemref>
+  <itemref idref="item10" linear="yes">
+  </itemref>
+  <itemref idref="item11" linear="yes">
+  </itemref>
+  <itemref idref="item12" linear="yes">
+  </itemref>
+  <itemref idref="item13" linear="yes">
+  </itemref>
+  <itemref idref="item14" linear="yes">
+  </itemref>
+  <itemref idref="item15" linear="yes">
+  </itemref>
+  <itemref idref="item16" linear="yes">
+  </itemref>
+  <itemref idref="item17" linear="yes">
+  </itemref>
+  <itemref idref="item18" linear="yes">
+  </itemref>
+  <itemref idref="item19" linear="yes">
+  </itemref>
+  <itemref idref="item20" linear="yes">
+  </itemref>
+  <itemref idref="item21" linear="yes">
+  </itemref>
+  <itemref idref="item22" linear="yes">
+  </itemref>
+  <itemref idref="item23" linear="yes">
+  </itemref>
+  <itemref idref="item24" linear="yes">
+  </itemref>
+  <itemref idref="item25" linear="yes">
+  </itemref>
+  <itemref idref="item26" linear="yes">
+  </itemref>
+  <itemref idref="item27" linear="yes">
+  </itemref>
+  <itemref idref="item28" linear="yes">
+  </itemref>
+  <itemref idref="item29" linear="yes">
+  </itemref>
+  <itemref idref="item30" linear="yes">
+  </itemref>
+  <itemref idref="item31" linear="yes">
+  </itemref>
+  <itemref idref="item32" linear="yes">
+  </itemref>
+  <itemref idref="item33" linear="yes">
+  </itemref>
+  <itemref idref="item34" linear="yes">
+  </itemref>
+  <itemref idref="item35" linear="yes">
+  </itemref>
+  <itemref idref="item36" linear="yes">
+  </itemref>
+  <itemref idref="item37" linear="yes">
+  </itemref>
+  <itemref idref="item38" linear="yes">
+  </itemref>
+  <itemref idref="item39" linear="yes">
+  </itemref>
+  <itemref idref="item40" linear="yes">
+  </itemref>
+  <itemref idref="item41" linear="yes">
+  </itemref>
+  <itemref idref="item42" linear="yes">
+  </itemref>
+  <itemref idref="item43" linear="yes">
+  </itemref>
+  <itemref idref="item44" linear="yes">
+  </itemref>
+  <itemref idref="item45" linear="yes">
+  </itemref>
+  <itemref idref="item46" linear="yes">
+  </itemref>
+  <itemref idref="item47" linear="yes">
+  </itemref>
+  <itemref idref="item48" linear="yes">
+  </itemref>
+  <itemref idref="item49" linear="yes">
+  </itemref>
+  <itemref idref="item50" linear="yes">
+  </itemref>
+  <itemref idref="item51" linear="yes">
+  </itemref>
+  <itemref idref="item52" linear="yes">
+  </itemref>
+  <itemref idref="item53" linear="yes">
+  </itemref>
+  <itemref idref="item54" linear="yes">
+  </itemref>
+  <itemref idref="item55" linear="yes">
+  </itemref>
+  <itemref idref="item56" linear="yes">
+  </itemref>
+  <itemref idref="item57" linear="yes">
+  </itemref>
+  <itemref idref="item58" linear="yes">
+  </itemref>
+  <itemref idref="item59" linear="yes">
+  </itemref>
+  <itemref idref="item60" linear="yes">
+  </itemref>
+  <itemref idref="item61" linear="yes">
+  </itemref>
+  <itemref idref="item62" linear="yes">
+  </itemref>
+  <itemref idref="item63" linear="yes">
+  </itemref>
+  <itemref idref="item64" linear="yes">
+  </itemref>
+  <itemref idref="item65" linear="yes">
+  </itemref>
+  <itemref idref="item66" linear="yes">
+  </itemref>
+  <itemref idref="item67" linear="yes">
+  </itemref>
+  <itemref idref="item68" linear="yes">
+  </itemref>
+  <itemref idref="item69" linear="yes">
+  </itemref>
+  <itemref idref="item70" linear="yes">
+  </itemref>
+  <itemref idref="item71" linear="yes">
+  </itemref>
+  <itemref idref="item72" linear="yes">
+  </itemref>
+  <itemref idref="item73" linear="yes">
+  </itemref>
+ </spine>
+ <guide>
+  <reference href="wrap0000.html" title="Cover" type="cover">
+  </reference>
+ </guide>
+</package>
+--------------------------------------------------------------------------------
+Trying to parse your markup with html5lib
+Here's what html5lib did with the markup:
+<!--?xml version='1.0' encoding='UTF-8'?-->
+<html>
+ <head>
+ </head>
+ <body>
+  <package unique-identifier="id" version="2.0" xmlns="http://www.idpf.org/2007/opf" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:opf="http://www.idpf.org/2007/opf" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+   <metadata>
+    <dc:rights>
+     Public domain in the USA.
+    </dc:rights>
+    <dc:identifier id="id" opf:scheme="URI">
+     http://www.gutenberg.org/2600
+    </dc:identifier>
+    <dc:creator opf:file-as="Tolstoy, Leo, graf">
+     graf Leo Tolstoy
+    </dc:creator>
+    <dc:contributor opf:file-as="Maude, Aylmer" opf:role="trl">
+     Aylmer Maude
+    </dc:contributor>
+    <dc:contributor opf:file-as="Maude, Louise" opf:role="trl">
+     Louise Maude
+    </dc:contributor>
+    <dc:title>
+     War and Peace
+    </dc:title>
+    <dc:language xsi:type="dcterms:RFC4646">
+     en
+    </dc:language>
+    <dc:subject>
+     Historical fiction
+    </dc:subject>
+    <dc:subject>
+     War stories
+    </dc:subject>
+    <dc:subject>
+     Napoleonic Wars, 1800-1815 -- Campaigns -- Russia -- Fiction
+    </dc:subject>
+    <dc:subject>
+     Russia -- History -- Alexander I, 1801-1825 -- Fiction
+    </dc:subject>
+    <dc:subject>
+     Aristocracy (Social class) -- Russia -- Fiction
+    </dc:subject>
+    <dc:date opf:event="publication">
+     2001-04-01
+    </dc:date>
+    <dc:date opf:event="conversion">
+     2020-04-02T07:55:23.696736+00:00
+    </dc:date>
+    <dc:source>
+     https://www.gutenberg.org/files/2600/2600-h/2600-h.htm
+    </dc:source>
+    <meta content="item1" name="cover"/>
+   </metadata>
+   <manifest>
+    <!--Image: 484 x 700 size=108909 q=90-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@images@cover.jpg" id="item1" media-type="image/jpeg">
+     <item href="pgepub.css" id="item2" media-type="text/css">
+      <item href="0.css" id="item3" media-type="text/css">
+       <item href="1.css" id="item4" media-type="text/css">
+        <!--Chunk: size=57550 Split on div-->
+        <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-0.htm.html" id="item5" media-type="application/xhtml+xml">
+         <!--Chunk: size=58304 Split on div-->
+         <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-1.htm.html" id="item6" media-type="application/xhtml+xml">
+          <!--Chunk: size=52760 Split on div-->
+          <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-2.htm.html" id="item7" media-type="application/xhtml+xml">
+           <!--Chunk: size=57074 Split on div-->
+           <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-3.htm.html" id="item8" media-type="application/xhtml+xml">
+            <!--Chunk: size=52377 Split on div-->
+            <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-4.htm.html" id="item9" media-type="application/xhtml+xml">
+             <!--Chunk: size=70163 Split on div-->
+             <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-5.htm.html" id="item10" media-type="application/xhtml+xml">
+              <!--Chunk: size=60805 Split on div-->
+              <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-6.htm.html" id="item11" media-type="application/xhtml+xml">
+               <!--Chunk: size=61815 Split on div-->
+               <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-7.htm.html" id="item12" media-type="application/xhtml+xml">
+                <!--Chunk: size=51653 Split on div-->
+                <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-8.htm.html" id="item13" media-type="application/xhtml+xml">
+                 <!--Chunk: size=64801 Split on div-->
+                 <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-9.htm.html" id="item14" media-type="application/xhtml+xml">
+                  <!--Chunk: size=51852 Split on div-->
+                  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-10.htm.html" id="item15" media-type="application/xhtml+xml">
+                   <!--Chunk: size=70173 Split on div-->
+                   <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-11.htm.html" id="item16" media-type="application/xhtml+xml">
+                    <!--Chunk: size=63821 Split on div-->
+                    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-12.htm.html" id="item17" media-type="application/xhtml+xml">
+                     <!--Chunk: size=53220 Split on div-->
+                     <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-13.htm.html" id="item18" media-type="application/xhtml+xml">
+                      <!--Chunk: size=51538 Split on div-->
+                      <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-14.htm.html" id="item19" media-type="application/xhtml+xml">
+                       <!--Chunk: size=56744 Split on div-->
+                       <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-15.htm.html" id="item20" media-type="application/xhtml+xml">
+                        <!--Chunk: size=53690 Split on div-->
+                        <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-16.htm.html" id="item21" media-type="application/xhtml+xml">
+                         <!--Chunk: size=58241 Split on div-->
+                         <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-17.htm.html" id="item22" media-type="application/xhtml+xml">
+                          <!--Chunk: size=57692 Split on div-->
+                          <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-18.htm.html" id="item23" media-type="application/xhtml+xml">
+                           <!--Chunk: size=58285 Split on div-->
+                           <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-19.htm.html" id="item24" media-type="application/xhtml+xml">
+                            <!--Chunk: size=57202 Split on div-->
+                            <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-20.htm.html" id="item25" media-type="application/xhtml+xml">
+                             <!--Chunk: size=63300 Split on div-->
+                             <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-21.htm.html" id="item26" media-type="application/xhtml+xml">
+                              <!--Chunk: size=60258 Split on div-->
+                              <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-22.htm.html" id="item27" media-type="application/xhtml+xml">
+                               <!--Chunk: size=55844 Split on div-->
+                               <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-23.htm.html" id="item28" media-type="application/xhtml+xml">
+                                <!--Chunk: size=54441 Split on div-->
+                                <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-24.htm.html" id="item29" media-type="application/xhtml+xml">
+                                 <!--Chunk: size=54570 Split on div-->
+                                 <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-25.htm.html" id="item30" media-type="application/xhtml+xml">
+                                  <!--Chunk: size=52393 Split on div-->
+                                  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-26.htm.html" id="item31" media-type="application/xhtml+xml">
+                                   <!--Chunk: size=51987 Split on div-->
+                                   <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-27.htm.html" id="item32" media-type="application/xhtml+xml">
+                                    <!--Chunk: size=67760 Split on div-->
+                                    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-28.htm.html" id="item33" media-type="application/xhtml+xml">
+                                     <!--Chunk: size=63989 Split on div-->
+                                     <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-29.htm.html" id="item34" media-type="application/xhtml+xml">
+                                      <!--Chunk: size=63702 Split on div-->
+                                      <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-30.htm.html" id="item35" media-type="application/xhtml+xml">
+                                       <!--Chunk: size=59862 Split on div-->
+                                       <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-31.htm.html" id="item36" media-type="application/xhtml+xml">
+                                        <!--Chunk: size=54267 Split on div-->
+                                        <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-32.htm.html" id="item37" media-type="application/xhtml+xml">
+                                         <!--Chunk: size=59292 Split on div-->
+                                         <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-33.htm.html" id="item38" media-type="application/xhtml+xml">
+                                          <!--Chunk: size=56661 Split on div-->
+                                          <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-34.htm.html" id="item39" media-type="application/xhtml+xml">
+                                           <!--Chunk: size=60083 Split on div-->
+                                           <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-35.htm.html" id="item40" media-type="application/xhtml+xml">
+                                            <!--Chunk: size=56200 Split on div-->
+                                            <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-36.htm.html" id="item41" media-type="application/xhtml+xml">
+                                             <!--Chunk: size=56136 Split on div-->
+                                             <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-37.htm.html" id="item42" media-type="application/xhtml+xml">
+                                              <!--Chunk: size=59126 Split on div-->
+                                              <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-38.htm.html" id="item43" media-type="application/xhtml+xml">
+                                               <!--Chunk: size=53080 Split on div-->
+                                               <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-39.htm.html" id="item44" media-type="application/xhtml+xml">
+                                                <!--Chunk: size=54926 Split on div-->
+                                                <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-40.htm.html" id="item45" media-type="application/xhtml+xml">
+                                                 <!--Chunk: size=67086 Split on div-->
+                                                 <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-41.htm.html" id="item46" media-type="application/xhtml+xml">
+                                                  <!--Chunk: size=57293 Split on div-->
+                                                  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-42.htm.html" id="item47" media-type="application/xhtml+xml">
+                                                   <!--Chunk: size=54513 Split on div-->
+                                                   <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-43.htm.html" id="item48" media-type="application/xhtml+xml">
+                                                    <!--Chunk: size=64103 Split on div-->
+                                                    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-44.htm.html" id="item49" media-type="application/xhtml+xml">
+                                                     <!--Chunk: size=57653 Split on div-->
+                                                     <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-45.htm.html" id="item50" media-type="application/xhtml+xml">
+                                                      <!--Chunk: size=55452 Split on div-->
+                                                      <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-46.htm.html" id="item51" media-type="application/xhtml+xml">
+                                                       <!--Chunk: size=59269 Split on div-->
+                                                       <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-47.htm.html" id="item52" media-type="application/xhtml+xml">
+                                                        <!--Chunk: size=53393 Split on div-->
+                                                        <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-48.htm.html" id="item53" media-type="application/xhtml+xml">
+                                                         <!--Chunk: size=51746 Split on div-->
+                                                         <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-49.htm.html" id="item54" media-type="application/xhtml+xml">
+                                                          <!--Chunk: size=58540 Split on div-->
+                                                          <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-50.htm.html" id="item55" media-type="application/xhtml+xml">
+                                                           <!--Chunk: size=66538 Split on div-->
+                                                           <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-51.htm.html" id="item56" media-type="application/xhtml+xml">
+                                                            <!--Chunk: size=53240 Split on div-->
+                                                            <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-52.htm.html" id="item57" media-type="application/xhtml+xml">
+                                                             <!--Chunk: size=54375 Split on div-->
+                                                             <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-53.htm.html" id="item58" media-type="application/xhtml+xml">
+                                                              <!--Chunk: size=54268 Split on div-->
+                                                              <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-54.htm.html" id="item59" media-type="application/xhtml+xml">
+                                                               <!--Chunk: size=54373 Split on div-->
+                                                               <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-55.htm.html" id="item60" media-type="application/xhtml+xml">
+                                                                <!--Chunk: size=58744 Split on div-->
+                                                                <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-56.htm.html" id="item61" media-type="application/xhtml+xml">
+                                                                 <!--Chunk: size=53208 Split on div-->
+                                                                 <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-57.htm.html" id="item62" media-type="application/xhtml+xml">
+                                                                  <!--Chunk: size=53405 Split on div-->
+                                                                  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-58.htm.html" id="item63" media-type="application/xhtml+xml">
+                                                                   <!--Chunk: size=53793 Split on div-->
+                                                                   <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-59.htm.html" id="item64" media-type="application/xhtml+xml">
+                                                                    <!--Chunk: size=54766 Split on div-->
+                                                                    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-60.htm.html" id="item65" media-type="application/xhtml+xml">
+                                                                     <!--Chunk: size=55485 Split on div-->
+                                                                     <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-61.htm.html" id="item66" media-type="application/xhtml+xml">
+                                                                      <!--Chunk: size=51658 Split on div-->
+                                                                      <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-62.htm.html" id="item67" media-type="application/xhtml+xml">
+                                                                       <!--Chunk: size=52806 Split on div-->
+                                                                       <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-63.htm.html" id="item68" media-type="application/xhtml+xml">
+                                                                        <!--Chunk: size=58713 Split on div-->
+                                                                        <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-64.htm.html" id="item69" media-type="application/xhtml+xml">
+                                                                         <!--Chunk: size=57610 Split on div-->
+                                                                         <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-65.htm.html" id="item70" media-type="application/xhtml+xml">
+                                                                          <!--Chunk: size=60847 Split on div-->
+                                                                          <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-66.htm.html" id="item71" media-type="application/xhtml+xml">
+                                                                           <!--Chunk: size=57840 Split on div-->
+                                                                           <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-67.htm.html" id="item72" media-type="application/xhtml+xml">
+                                                                            <!--Chunk: size=68925-->
+                                                                            <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-68.htm.html" id="item73" media-type="application/xhtml+xml">
+                                                                             <item href="toc.ncx" id="ncx" media-type="application/x-dtbncx+xml">
+                                                                              <item href="wrap0000.html" id="coverpage-wrapper" media-type="application/xhtml+xml">
+                                                                              </item>
+                                                                             </item>
+                                                                            </item>
+                                                                           </item>
+                                                                          </item>
+                                                                         </item>
+                                                                        </item>
+                                                                       </item>
+                                                                      </item>
+                                                                     </item>
+                                                                    </item>
+                                                                   </item>
+                                                                  </item>
+                                                                 </item>
+                                                                </item>
+                                                               </item>
+                                                              </item>
+                                                             </item>
+                                                            </item>
+                                                           </item>
+                                                          </item>
+                                                         </item>
+                                                        </item>
+                                                       </item>
+                                                      </item>
+                                                     </item>
+                                                    </item>
+                                                   </item>
+                                                  </item>
+                                                 </item>
+                                                </item>
+                                               </item>
+                                              </item>
+                                             </item>
+                                            </item>
+                                           </item>
+                                          </item>
+                                         </item>
+                                        </item>
+                                       </item>
+                                      </item>
+                                     </item>
+                                    </item>
+                                   </item>
+                                  </item>
+                                 </item>
+                                </item>
+                               </item>
+                              </item>
+                             </item>
+                            </item>
+                           </item>
+                          </item>
+                         </item>
+                        </item>
+                       </item>
+                      </item>
+                     </item>
+                    </item>
+                   </item>
+                  </item>
+                 </item>
+                </item>
+               </item>
+              </item>
+             </item>
+            </item>
+           </item>
+          </item>
+         </item>
+        </item>
+       </item>
+      </item>
+     </item>
+    </item>
+   </manifest>
+   <spine toc="ncx">
+    <itemref idref="coverpage-wrapper" linear="yes">
+     <itemref idref="item5" linear="yes">
+      <itemref idref="item6" linear="yes">
+       <itemref idref="item7" linear="yes">
+        <itemref idref="item8" linear="yes">
+         <itemref idref="item9" linear="yes">
+          <itemref idref="item10" linear="yes">
+           <itemref idref="item11" linear="yes">
+            <itemref idref="item12" linear="yes">
+             <itemref idref="item13" linear="yes">
+              <itemref idref="item14" linear="yes">
+               <itemref idref="item15" linear="yes">
+                <itemref idref="item16" linear="yes">
+                 <itemref idref="item17" linear="yes">
+                  <itemref idref="item18" linear="yes">
+                   <itemref idref="item19" linear="yes">
+                    <itemref idref="item20" linear="yes">
+                     <itemref idref="item21" linear="yes">
+                      <itemref idref="item22" linear="yes">
+                       <itemref idref="item23" linear="yes">
+                        <itemref idref="item24" linear="yes">
+                         <itemref idref="item25" linear="yes">
+                          <itemref idref="item26" linear="yes">
+                           <itemref idref="item27" linear="yes">
+                            <itemref idref="item28" linear="yes">
+                             <itemref idref="item29" linear="yes">
+                              <itemref idref="item30" linear="yes">
+                               <itemref idref="item31" linear="yes">
+                                <itemref idref="item32" linear="yes">
+                                 <itemref idref="item33" linear="yes">
+                                  <itemref idref="item34" linear="yes">
+                                   <itemref idref="item35" linear="yes">
+                                    <itemref idref="item36" linear="yes">
+                                     <itemref idref="item37" linear="yes">
+                                      <itemref idref="item38" linear="yes">
+                                       <itemref idref="item39" linear="yes">
+                                        <itemref idref="item40" linear="yes">
+                                         <itemref idref="item41" linear="yes">
+                                          <itemref idref="item42" linear="yes">
+                                           <itemref idref="item43" linear="yes">
+                                            <itemref idref="item44" linear="yes">
+                                             <itemref idref="item45" linear="yes">
+                                              <itemref idref="item46" linear="yes">
+                                               <itemref idref="item47" linear="yes">
+                                                <itemref idref="item48" linear="yes">
+                                                 <itemref idref="item49" linear="yes">
+                                                  <itemref idref="item50" linear="yes">
+                                                   <itemref idref="item51" linear="yes">
+                                                    <itemref idref="item52" linear="yes">
+                                                     <itemref idref="item53" linear="yes">
+                                                      <itemref idref="item54" linear="yes">
+                                                       <itemref idref="item55" linear="yes">
+                                                        <itemref idref="item56" linear="yes">
+                                                         <itemref idref="item57" linear="yes">
+                                                          <itemref idref="item58" linear="yes">
+                                                           <itemref idref="item59" linear="yes">
+                                                            <itemref idref="item60" linear="yes">
+                                                             <itemref idref="item61" linear="yes">
+                                                              <itemref idref="item62" linear="yes">
+                                                               <itemref idref="item63" linear="yes">
+                                                                <itemref idref="item64" linear="yes">
+                                                                 <itemref idref="item65" linear="yes">
+                                                                  <itemref idref="item66" linear="yes">
+                                                                   <itemref idref="item67" linear="yes">
+                                                                    <itemref idref="item68" linear="yes">
+                                                                     <itemref idref="item69" linear="yes">
+                                                                      <itemref idref="item70" linear="yes">
+                                                                       <itemref idref="item71" linear="yes">
+                                                                        <itemref idref="item72" linear="yes">
+                                                                         <itemref idref="item73" linear="yes">
+                                                                         </itemref>
+                                                                        </itemref>
+                                                                       </itemref>
+                                                                      </itemref>
+                                                                     </itemref>
+                                                                    </itemref>
+                                                                   </itemref>
+                                                                  </itemref>
+                                                                 </itemref>
+                                                                </itemref>
+                                                               </itemref>
+                                                              </itemref>
+                                                             </itemref>
+                                                            </itemref>
+                                                           </itemref>
+                                                          </itemref>
+                                                         </itemref>
+                                                        </itemref>
+                                                       </itemref>
+                                                      </itemref>
+                                                     </itemref>
+                                                    </itemref>
+                                                   </itemref>
+                                                  </itemref>
+                                                 </itemref>
+                                                </itemref>
+                                               </itemref>
+                                              </itemref>
+                                             </itemref>
+                                            </itemref>
+                                           </itemref>
+                                          </itemref>
+                                         </itemref>
+                                        </itemref>
+                                       </itemref>
+                                      </itemref>
+                                     </itemref>
+                                    </itemref>
+                                   </itemref>
+                                  </itemref>
+                                 </itemref>
+                                </itemref>
+                               </itemref>
+                              </itemref>
+                             </itemref>
+                            </itemref>
+                           </itemref>
+                          </itemref>
+                         </itemref>
+                        </itemref>
+                       </itemref>
+                      </itemref>
+                     </itemref>
+                    </itemref>
+                   </itemref>
+                  </itemref>
+                 </itemref>
+                </itemref>
+               </itemref>
+              </itemref>
+             </itemref>
+            </itemref>
+           </itemref>
+          </itemref>
+         </itemref>
+        </itemref>
+       </itemref>
+      </itemref>
+     </itemref>
+    </itemref>
+   </spine>
+   <guide>
+    <reference href="wrap0000.html" title="Cover" type="cover">
+    </reference>
+   </guide>
+  </package>
+ </body>
+</html>
+--------------------------------------------------------------------------------
+Trying to parse your markup with lxml
+Here's what lxml did with the markup:
+<?xml version='1.0' encoding='UTF-8'?>
+<html>
+ <body>
+  <package unique-identifier="id" version="2.0" xmlns="http://www.idpf.org/2007/opf" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:opf="http://www.idpf.org/2007/opf" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+   <metadata>
+    <dc:rights>
+     Public domain in the USA.
+    </dc:rights>
+    <dc:identifier id="id" opf:scheme="URI">
+     http://www.gutenberg.org/2600
+    </dc:identifier>
+    <dc:creator opf:file-as="Tolstoy, Leo, graf">
+     graf Leo Tolstoy
+    </dc:creator>
+    <dc:contributor opf:file-as="Maude, Aylmer" opf:role="trl">
+     Aylmer Maude
+    </dc:contributor>
+    <dc:contributor opf:file-as="Maude, Louise" opf:role="trl">
+     Louise Maude
+    </dc:contributor>
+    <dc:title>
+     War and Peace
+    </dc:title>
+    <dc:language xsi:type="dcterms:RFC4646">
+     en
+    </dc:language>
+    <dc:subject>
+     Historical fiction
+    </dc:subject>
+    <dc:subject>
+     War stories
+    </dc:subject>
+    <dc:subject>
+     Napoleonic Wars, 1800-1815 -- Campaigns -- Russia -- Fiction
+    </dc:subject>
+    <dc:subject>
+     Russia -- History -- Alexander I, 1801-1825 -- Fiction
+    </dc:subject>
+    <dc:subject>
+     Aristocracy (Social class) -- Russia -- Fiction
+    </dc:subject>
+    <dc:date opf:event="publication">
+     2001-04-01
+    </dc:date>
+    <dc:date opf:event="conversion">
+     2020-04-02T07:55:23.696736+00:00
+    </dc:date>
+    <dc:source>
+     https://www.gutenberg.org/files/2600/2600-h/2600-h.htm
+    </dc:source>
+    <meta content="item1" name="cover"/>
+   </metadata>
+   <manifest>
+    <!--Image: 484 x 700 size=108909 q=90-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@images@cover.jpg" id="item1" media-type="image/jpeg">
+    </item>
+    <item href="pgepub.css" id="item2" media-type="text/css">
+    </item>
+    <item href="0.css" id="item3" media-type="text/css">
+    </item>
+    <item href="1.css" id="item4" media-type="text/css">
+    </item>
+    <!--Chunk: size=57550 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-0.htm.html" id="item5" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=58304 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-1.htm.html" id="item6" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=52760 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-2.htm.html" id="item7" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=57074 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-3.htm.html" id="item8" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=52377 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-4.htm.html" id="item9" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=70163 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-5.htm.html" id="item10" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=60805 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-6.htm.html" id="item11" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=61815 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-7.htm.html" id="item12" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=51653 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-8.htm.html" id="item13" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=64801 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-9.htm.html" id="item14" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=51852 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-10.htm.html" id="item15" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=70173 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-11.htm.html" id="item16" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=63821 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-12.htm.html" id="item17" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=53220 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-13.htm.html" id="item18" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=51538 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-14.htm.html" id="item19" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=56744 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-15.htm.html" id="item20" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=53690 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-16.htm.html" id="item21" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=58241 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-17.htm.html" id="item22" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=57692 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-18.htm.html" id="item23" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=58285 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-19.htm.html" id="item24" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=57202 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-20.htm.html" id="item25" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=63300 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-21.htm.html" id="item26" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=60258 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-22.htm.html" id="item27" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=55844 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-23.htm.html" id="item28" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=54441 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-24.htm.html" id="item29" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=54570 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-25.htm.html" id="item30" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=52393 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-26.htm.html" id="item31" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=51987 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-27.htm.html" id="item32" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=67760 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-28.htm.html" id="item33" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=63989 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-29.htm.html" id="item34" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=63702 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-30.htm.html" id="item35" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=59862 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-31.htm.html" id="item36" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=54267 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-32.htm.html" id="item37" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=59292 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-33.htm.html" id="item38" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=56661 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-34.htm.html" id="item39" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=60083 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-35.htm.html" id="item40" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=56200 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-36.htm.html" id="item41" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=56136 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-37.htm.html" id="item42" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=59126 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-38.htm.html" id="item43" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=53080 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-39.htm.html" id="item44" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=54926 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-40.htm.html" id="item45" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=67086 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-41.htm.html" id="item46" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=57293 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-42.htm.html" id="item47" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=54513 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-43.htm.html" id="item48" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=64103 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-44.htm.html" id="item49" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=57653 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-45.htm.html" id="item50" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=55452 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-46.htm.html" id="item51" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=59269 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-47.htm.html" id="item52" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=53393 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-48.htm.html" id="item53" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=51746 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-49.htm.html" id="item54" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=58540 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-50.htm.html" id="item55" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=66538 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-51.htm.html" id="item56" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=53240 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-52.htm.html" id="item57" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=54375 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-53.htm.html" id="item58" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=54268 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-54.htm.html" id="item59" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=54373 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-55.htm.html" id="item60" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=58744 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-56.htm.html" id="item61" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=53208 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-57.htm.html" id="item62" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=53405 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-58.htm.html" id="item63" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=53793 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-59.htm.html" id="item64" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=54766 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-60.htm.html" id="item65" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=55485 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-61.htm.html" id="item66" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=51658 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-62.htm.html" id="item67" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=52806 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-63.htm.html" id="item68" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=58713 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-64.htm.html" id="item69" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=57610 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-65.htm.html" id="item70" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=60847 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-66.htm.html" id="item71" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=57840 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-67.htm.html" id="item72" media-type="application/xhtml+xml">
+    </item>
+    <!--Chunk: size=68925-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-68.htm.html" id="item73" media-type="application/xhtml+xml">
+    </item>
+    <item href="toc.ncx" id="ncx" media-type="application/x-dtbncx+xml">
+    </item>
+    <item href="wrap0000.html" id="coverpage-wrapper" media-type="application/xhtml+xml">
+    </item>
+   </manifest>
+   <spine toc="ncx">
+    <itemref idref="coverpage-wrapper" linear="yes">
+    </itemref>
+    <itemref idref="item5" linear="yes">
+    </itemref>
+    <itemref idref="item6" linear="yes">
+    </itemref>
+    <itemref idref="item7" linear="yes">
+    </itemref>
+    <itemref idref="item8" linear="yes">
+    </itemref>
+    <itemref idref="item9" linear="yes">
+    </itemref>
+    <itemref idref="item10" linear="yes">
+    </itemref>
+    <itemref idref="item11" linear="yes">
+    </itemref>
+    <itemref idref="item12" linear="yes">
+    </itemref>
+    <itemref idref="item13" linear="yes">
+    </itemref>
+    <itemref idref="item14" linear="yes">
+    </itemref>
+    <itemref idref="item15" linear="yes">
+    </itemref>
+    <itemref idref="item16" linear="yes">
+    </itemref>
+    <itemref idref="item17" linear="yes">
+    </itemref>
+    <itemref idref="item18" linear="yes">
+    </itemref>
+    <itemref idref="item19" linear="yes">
+    </itemref>
+    <itemref idref="item20" linear="yes">
+    </itemref>
+    <itemref idref="item21" linear="yes">
+    </itemref>
+    <itemref idref="item22" linear="yes">
+    </itemref>
+    <itemref idref="item23" linear="yes">
+    </itemref>
+    <itemref idref="item24" linear="yes">
+    </itemref>
+    <itemref idref="item25" linear="yes">
+    </itemref>
+    <itemref idref="item26" linear="yes">
+    </itemref>
+    <itemref idref="item27" linear="yes">
+    </itemref>
+    <itemref idref="item28" linear="yes">
+    </itemref>
+    <itemref idref="item29" linear="yes">
+    </itemref>
+    <itemref idref="item30" linear="yes">
+    </itemref>
+    <itemref idref="item31" linear="yes">
+    </itemref>
+    <itemref idref="item32" linear="yes">
+    </itemref>
+    <itemref idref="item33" linear="yes">
+    </itemref>
+    <itemref idref="item34" linear="yes">
+    </itemref>
+    <itemref idref="item35" linear="yes">
+    </itemref>
+    <itemref idref="item36" linear="yes">
+    </itemref>
+    <itemref idref="item37" linear="yes">
+    </itemref>
+    <itemref idref="item38" linear="yes">
+    </itemref>
+    <itemref idref="item39" linear="yes">
+    </itemref>
+    <itemref idref="item40" linear="yes">
+    </itemref>
+    <itemref idref="item41" linear="yes">
+    </itemref>
+    <itemref idref="item42" linear="yes">
+    </itemref>
+    <itemref idref="item43" linear="yes">
+    </itemref>
+    <itemref idref="item44" linear="yes">
+    </itemref>
+    <itemref idref="item45" linear="yes">
+    </itemref>
+    <itemref idref="item46" linear="yes">
+    </itemref>
+    <itemref idref="item47" linear="yes">
+    </itemref>
+    <itemref idref="item48" linear="yes">
+    </itemref>
+    <itemref idref="item49" linear="yes">
+    </itemref>
+    <itemref idref="item50" linear="yes">
+    </itemref>
+    <itemref idref="item51" linear="yes">
+    </itemref>
+    <itemref idref="item52" linear="yes">
+    </itemref>
+    <itemref idref="item53" linear="yes">
+    </itemref>
+    <itemref idref="item54" linear="yes">
+    </itemref>
+    <itemref idref="item55" linear="yes">
+    </itemref>
+    <itemref idref="item56" linear="yes">
+    </itemref>
+    <itemref idref="item57" linear="yes">
+    </itemref>
+    <itemref idref="item58" linear="yes">
+    </itemref>
+    <itemref idref="item59" linear="yes">
+    </itemref>
+    <itemref idref="item60" linear="yes">
+    </itemref>
+    <itemref idref="item61" linear="yes">
+    </itemref>
+    <itemref idref="item62" linear="yes">
+    </itemref>
+    <itemref idref="item63" linear="yes">
+    </itemref>
+    <itemref idref="item64" linear="yes">
+    </itemref>
+    <itemref idref="item65" linear="yes">
+    </itemref>
+    <itemref idref="item66" linear="yes">
+    </itemref>
+    <itemref idref="item67" linear="yes">
+    </itemref>
+    <itemref idref="item68" linear="yes">
+    </itemref>
+    <itemref idref="item69" linear="yes">
+    </itemref>
+    <itemref idref="item70" linear="yes">
+    </itemref>
+    <itemref idref="item71" linear="yes">
+    </itemref>
+    <itemref idref="item72" linear="yes">
+    </itemref>
+    <itemref idref="item73" linear="yes">
+    </itemref>
+   </spine>
+   <guide>
+    <reference href="wrap0000.html" title="Cover" type="cover">
+    </reference>
+   </guide>
+  </package>
+ </body>
+</html>
+--------------------------------------------------------------------------------
+Trying to parse your markup with lxml-xml
+Here's what lxml-xml did with the markup:
+<?xml version="1.0" encoding="utf-8"?>
+<package unique-identifier="id" version="2.0" xmlns="http://www.idpf.org/2007/opf" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:opf="http://www.idpf.org/2007/opf" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <metadata>
+  <dc:rights>
+   Public domain in the USA.
+  </dc:rights>
+  <dc:identifier id="id" scheme="URI">
+   http://www.gutenberg.org/2600
+  </dc:identifier>
+  <dc:creator file-as="Tolstoy, Leo, graf">
+   graf Leo Tolstoy
+  </dc:creator>
+  <dc:contributor file-as="Maude, Aylmer" role="trl">
+   Aylmer Maude
+  </dc:contributor>
+  <dc:contributor file-as="Maude, Louise" role="trl">
+   Louise Maude
+  </dc:contributor>
+  <dc:title>
+   War and Peace
+  </dc:title>
+  <dc:language xsi:type="dcterms:RFC4646">
+   en
+  </dc:language>
+  <dc:subject>
+   Historical fiction
+  </dc:subject>
+  <dc:subject>
+   War stories
+  </dc:subject>
+  <dc:subject>
+   Napoleonic Wars, 1800-1815 -- Campaigns -- Russia -- Fiction
+  </dc:subject>
+  <dc:subject>
+   Russia -- History -- Alexander I, 1801-1825 -- Fiction
+  </dc:subject>
+  <dc:subject>
+   Aristocracy (Social class) -- Russia -- Fiction
+  </dc:subject>
+  <dc:date event="publication">
+   2001-04-01
+  </dc:date>
+  <dc:date event="conversion">
+   2020-04-02T07:55:23.696736+00:00
+  </dc:date>
+  <dc:source>
+   https://www.gutenberg.org/files/2600/2600-h/2600-h.htm
+  </dc:source>
+  <meta content="item1" name="cover"/>
+ </metadata>
+ <manifest>
+  <!--Image: 484 x 700 size=108909 q=90-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@images@cover.jpg" id="item1" media-type="image/jpeg"/>
+  <item href="pgepub.css" id="item2" media-type="text/css"/>
+  <item href="0.css" id="item3" media-type="text/css"/>
+  <item href="1.css" id="item4" media-type="text/css"/>
+  <!--Chunk: size=57550 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-0.htm.html" id="item5" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=58304 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-1.htm.html" id="item6" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=52760 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-2.htm.html" id="item7" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=57074 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-3.htm.html" id="item8" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=52377 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-4.htm.html" id="item9" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=70163 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-5.htm.html" id="item10" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=60805 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-6.htm.html" id="item11" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=61815 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-7.htm.html" id="item12" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=51653 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-8.htm.html" id="item13" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=64801 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-9.htm.html" id="item14" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=51852 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-10.htm.html" id="item15" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=70173 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-11.htm.html" id="item16" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=63821 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-12.htm.html" id="item17" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=53220 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-13.htm.html" id="item18" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=51538 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-14.htm.html" id="item19" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=56744 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-15.htm.html" id="item20" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=53690 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-16.htm.html" id="item21" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=58241 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-17.htm.html" id="item22" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=57692 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-18.htm.html" id="item23" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=58285 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-19.htm.html" id="item24" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=57202 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-20.htm.html" id="item25" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=63300 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-21.htm.html" id="item26" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=60258 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-22.htm.html" id="item27" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=55844 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-23.htm.html" id="item28" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=54441 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-24.htm.html" id="item29" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=54570 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-25.htm.html" id="item30" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=52393 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-26.htm.html" id="item31" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=51987 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-27.htm.html" id="item32" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=67760 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-28.htm.html" id="item33" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=63989 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-29.htm.html" id="item34" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=63702 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-30.htm.html" id="item35" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=59862 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-31.htm.html" id="item36" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=54267 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-32.htm.html" id="item37" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=59292 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-33.htm.html" id="item38" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=56661 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-34.htm.html" id="item39" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=60083 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-35.htm.html" id="item40" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=56200 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-36.htm.html" id="item41" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=56136 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-37.htm.html" id="item42" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=59126 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-38.htm.html" id="item43" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=53080 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-39.htm.html" id="item44" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=54926 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-40.htm.html" id="item45" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=67086 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-41.htm.html" id="item46" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=57293 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-42.htm.html" id="item47" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=54513 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-43.htm.html" id="item48" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=64103 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-44.htm.html" id="item49" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=57653 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-45.htm.html" id="item50" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=55452 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-46.htm.html" id="item51" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=59269 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-47.htm.html" id="item52" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=53393 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-48.htm.html" id="item53" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=51746 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-49.htm.html" id="item54" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=58540 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-50.htm.html" id="item55" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=66538 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-51.htm.html" id="item56" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=53240 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-52.htm.html" id="item57" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=54375 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-53.htm.html" id="item58" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=54268 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-54.htm.html" id="item59" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=54373 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-55.htm.html" id="item60" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=58744 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-56.htm.html" id="item61" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=53208 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-57.htm.html" id="item62" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=53405 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-58.htm.html" id="item63" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=53793 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-59.htm.html" id="item64" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=54766 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-60.htm.html" id="item65" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=55485 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-61.htm.html" id="item66" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=51658 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-62.htm.html" id="item67" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=52806 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-63.htm.html" id="item68" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=58713 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-64.htm.html" id="item69" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=57610 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-65.htm.html" id="item70" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=60847 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-66.htm.html" id="item71" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=57840 Split on div-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-67.htm.html" id="item72" media-type="application/xhtml+xml"/>
+  <!--Chunk: size=68925-->
+  <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-68.htm.html" id="item73" media-type="application/xhtml+xml"/>
+  <item href="toc.ncx" id="ncx" media-type="application/x-dtbncx+xml"/>
+  <item href="wrap0000.html" id="coverpage-wrapper" media-type="application/xhtml+xml"/>
+ </manifest>
+ <spine toc="ncx">
+  <itemref idref="coverpage-wrapper" linear="yes"/>
+  <itemref idref="item5" linear="yes"/>
+  <itemref idref="item6" linear="yes"/>
+  <itemref idref="item7" linear="yes"/>
+  <itemref idref="item8" linear="yes"/>
+  <itemref idref="item9" linear="yes"/>
+  <itemref idref="item10" linear="yes"/>
+  <itemref idref="item11" linear="yes"/>
+  <itemref idref="item12" linear="yes"/>
+  <itemref idref="item13" linear="yes"/>
+  <itemref idref="item14" linear="yes"/>
+  <itemref idref="item15" linear="yes"/>
+  <itemref idref="item16" linear="yes"/>
+  <itemref idref="item17" linear="yes"/>
+  <itemref idref="item18" linear="yes"/>
+  <itemref idref="item19" linear="yes"/>
+  <itemref idref="item20" linear="yes"/>
+  <itemref idref="item21" linear="yes"/>
+  <itemref idref="item22" linear="yes"/>
+  <itemref idref="item23" linear="yes"/>
+  <itemref idref="item24" linear="yes"/>
+  <itemref idref="item25" linear="yes"/>
+  <itemref idref="item26" linear="yes"/>
+  <itemref idref="item27" linear="yes"/>
+  <itemref idref="item28" linear="yes"/>
+  <itemref idref="item29" linear="yes"/>
+  <itemref idref="item30" linear="yes"/>
+  <itemref idref="item31" linear="yes"/>
+  <itemref idref="item32" linear="yes"/>
+  <itemref idref="item33" linear="yes"/>
+  <itemref idref="item34" linear="yes"/>
+  <itemref idref="item35" linear="yes"/>
+  <itemref idref="item36" linear="yes"/>
+  <itemref idref="item37" linear="yes"/>
+  <itemref idref="item38" linear="yes"/>
+  <itemref idref="item39" linear="yes"/>
+  <itemref idref="item40" linear="yes"/>
+  <itemref idref="item41" linear="yes"/>
+  <itemref idref="item42" linear="yes"/>
+  <itemref idref="item43" linear="yes"/>
+  <itemref idref="item44" linear="yes"/>
+  <itemref idref="item45" linear="yes"/>
+  <itemref idref="item46" linear="yes"/>
+  <itemref idref="item47" linear="yes"/>
+  <itemref idref="item48" linear="yes"/>
+  <itemref idref="item49" linear="yes"/>
+  <itemref idref="item50" linear="yes"/>
+  <itemref idref="item51" linear="yes"/>
+  <itemref idref="item52" linear="yes"/>
+  <itemref idref="item53" linear="yes"/>
+  <itemref idref="item54" linear="yes"/>
+  <itemref idref="item55" linear="yes"/>
+  <itemref idref="item56" linear="yes"/>
+  <itemref idref="item57" linear="yes"/>
+  <itemref idref="item58" linear="yes"/>
+  <itemref idref="item59" linear="yes"/>
+  <itemref idref="item60" linear="yes"/>
+  <itemref idref="item61" linear="yes"/>
+  <itemref idref="item62" linear="yes"/>
+  <itemref idref="item63" linear="yes"/>
+  <itemref idref="item64" linear="yes"/>
+  <itemref idref="item65" linear="yes"/>
+  <itemref idref="item66" linear="yes"/>
+  <itemref idref="item67" linear="yes"/>
+  <itemref idref="item68" linear="yes"/>
+  <itemref idref="item69" linear="yes"/>
+  <itemref idref="item70" linear="yes"/>
+  <itemref idref="item71" linear="yes"/>
+  <itemref idref="item72" linear="yes"/>
+  <itemref idref="item73" linear="yes"/>
+ </spine>
+ <guide>
+  <reference href="wrap0000.html" title="Cover" type="cover"/>
+ </guide>
+</package>
+--------------------------------------------------------------------------------
+<?xml version="1.0" encoding="utf-8"?>
+<package unique-identifier="id" version="2.0" xmlns="http://www.idpf.org/2007/opf" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:opf="http://www.idpf.org/2007/opf" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+<metadata>
+<dc:rights>Public domain in the USA.</dc:rights>
+<dc:identifier id="id" scheme="URI">http://www.gutenberg.org/2600</dc:identifier>
+<dc:creator file-as="Tolstoy, Leo, graf">graf Leo Tolstoy</dc:creator>
+<dc:contributor file-as="Maude, Aylmer" role="trl">Aylmer Maude</dc:contributor>
+<dc:contributor file-as="Maude, Louise" role="trl">Louise Maude</dc:contributor>
+<dc:title>War and Peace</dc:title>
+<dc:language xsi:type="dcterms:RFC4646">en</dc:language>
+<dc:subject>Historical fiction</dc:subject>
+<dc:subject>War stories</dc:subject>
+<dc:subject>Napoleonic Wars, 1800-1815 -- Campaigns -- Russia -- Fiction</dc:subject>
+<dc:subject>Russia -- History -- Alexander I, 1801-1825 -- Fiction</dc:subject>
+<dc:subject>Aristocracy (Social class) -- Russia -- Fiction</dc:subject>
+<dc:date event="publication">2001-04-01</dc:date>
+<dc:date event="conversion">2020-04-02T07:55:23.696736+00:00</dc:date>
+<dc:source>https://www.gutenberg.org/files/2600/2600-h/2600-h.htm</dc:source>
+<meta content="item1" name="cover"/>
+</metadata>
+<manifest>
+<!--Image: 484 x 700 size=108909 q=90-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@images@cover.jpg" id="item1" media-type="image/jpeg"/>
+<item href="pgepub.css" id="item2" media-type="text/css"/>
+<item href="0.css" id="item3" media-type="text/css"/>
+<item href="1.css" id="item4" media-type="text/css"/>
+<!--Chunk: size=57550 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-0.htm.html" id="item5" media-type="application/xhtml+xml"/>
+<!--Chunk: size=58304 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-1.htm.html" id="item6" media-type="application/xhtml+xml"/>
+<!--Chunk: size=52760 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-2.htm.html" id="item7" media-type="application/xhtml+xml"/>
+<!--Chunk: size=57074 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-3.htm.html" id="item8" media-type="application/xhtml+xml"/>
+<!--Chunk: size=52377 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-4.htm.html" id="item9" media-type="application/xhtml+xml"/>
+<!--Chunk: size=70163 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-5.htm.html" id="item10" media-type="application/xhtml+xml"/>
+<!--Chunk: size=60805 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-6.htm.html" id="item11" media-type="application/xhtml+xml"/>
+<!--Chunk: size=61815 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-7.htm.html" id="item12" media-type="application/xhtml+xml"/>
+<!--Chunk: size=51653 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-8.htm.html" id="item13" media-type="application/xhtml+xml"/>
+<!--Chunk: size=64801 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-9.htm.html" id="item14" media-type="application/xhtml+xml"/>
+<!--Chunk: size=51852 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-10.htm.html" id="item15" media-type="application/xhtml+xml"/>
+<!--Chunk: size=70173 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-11.htm.html" id="item16" media-type="application/xhtml+xml"/>
+<!--Chunk: size=63821 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-12.htm.html" id="item17" media-type="application/xhtml+xml"/>
+<!--Chunk: size=53220 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-13.htm.html" id="item18" media-type="application/xhtml+xml"/>
+<!--Chunk: size=51538 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-14.htm.html" id="item19" media-type="application/xhtml+xml"/>
+<!--Chunk: size=56744 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-15.htm.html" id="item20" media-type="application/xhtml+xml"/>
+<!--Chunk: size=53690 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-16.htm.html" id="item21" media-type="application/xhtml+xml"/>
+<!--Chunk: size=58241 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-17.htm.html" id="item22" media-type="application/xhtml+xml"/>
+<!--Chunk: size=57692 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-18.htm.html" id="item23" media-type="application/xhtml+xml"/>
+<!--Chunk: size=58285 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-19.htm.html" id="item24" media-type="application/xhtml+xml"/>
+<!--Chunk: size=57202 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-20.htm.html" id="item25" media-type="application/xhtml+xml"/>
+<!--Chunk: size=63300 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-21.htm.html" id="item26" media-type="application/xhtml+xml"/>
+<!--Chunk: size=60258 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-22.htm.html" id="item27" media-type="application/xhtml+xml"/>
+<!--Chunk: size=55844 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-23.htm.html" id="item28" media-type="application/xhtml+xml"/>
+<!--Chunk: size=54441 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-24.htm.html" id="item29" media-type="application/xhtml+xml"/>
+<!--Chunk: size=54570 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-25.htm.html" id="item30" media-type="application/xhtml+xml"/>
+<!--Chunk: size=52393 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-26.htm.html" id="item31" media-type="application/xhtml+xml"/>
+<!--Chunk: size=51987 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-27.htm.html" id="item32" media-type="application/xhtml+xml"/>
+<!--Chunk: size=67760 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-28.htm.html" id="item33" media-type="application/xhtml+xml"/>
+<!--Chunk: size=63989 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-29.htm.html" id="item34" media-type="application/xhtml+xml"/>
+<!--Chunk: size=63702 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-30.htm.html" id="item35" media-type="application/xhtml+xml"/>
+<!--Chunk: size=59862 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-31.htm.html" id="item36" media-type="application/xhtml+xml"/>
+<!--Chunk: size=54267 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-32.htm.html" id="item37" media-type="application/xhtml+xml"/>
+<!--Chunk: size=59292 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-33.htm.html" id="item38" media-type="application/xhtml+xml"/>
+<!--Chunk: size=56661 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-34.htm.html" id="item39" media-type="application/xhtml+xml"/>
+<!--Chunk: size=60083 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-35.htm.html" id="item40" media-type="application/xhtml+xml"/>
+<!--Chunk: size=56200 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-36.htm.html" id="item41" media-type="application/xhtml+xml"/>
+<!--Chunk: size=56136 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-37.htm.html" id="item42" media-type="application/xhtml+xml"/>
+<!--Chunk: size=59126 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-38.htm.html" id="item43" media-type="application/xhtml+xml"/>
+<!--Chunk: size=53080 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-39.htm.html" id="item44" media-type="application/xhtml+xml"/>
+<!--Chunk: size=54926 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-40.htm.html" id="item45" media-type="application/xhtml+xml"/>
+<!--Chunk: size=67086 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-41.htm.html" id="item46" media-type="application/xhtml+xml"/>
+<!--Chunk: size=57293 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-42.htm.html" id="item47" media-type="application/xhtml+xml"/>
+<!--Chunk: size=54513 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-43.htm.html" id="item48" media-type="application/xhtml+xml"/>
+<!--Chunk: size=64103 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-44.htm.html" id="item49" media-type="application/xhtml+xml"/>
+<!--Chunk: size=57653 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-45.htm.html" id="item50" media-type="application/xhtml+xml"/>
+<!--Chunk: size=55452 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-46.htm.html" id="item51" media-type="application/xhtml+xml"/>
+<!--Chunk: size=59269 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-47.htm.html" id="item52" media-type="application/xhtml+xml"/>
+<!--Chunk: size=53393 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-48.htm.html" id="item53" media-type="application/xhtml+xml"/>
+<!--Chunk: size=51746 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-49.htm.html" id="item54" media-type="application/xhtml+xml"/>
+<!--Chunk: size=58540 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-50.htm.html" id="item55" media-type="application/xhtml+xml"/>
+<!--Chunk: size=66538 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-51.htm.html" id="item56" media-type="application/xhtml+xml"/>
+<!--Chunk: size=53240 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-52.htm.html" id="item57" media-type="application/xhtml+xml"/>
+<!--Chunk: size=54375 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-53.htm.html" id="item58" media-type="application/xhtml+xml"/>
+<!--Chunk: size=54268 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-54.htm.html" id="item59" media-type="application/xhtml+xml"/>
+<!--Chunk: size=54373 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-55.htm.html" id="item60" media-type="application/xhtml+xml"/>
+<!--Chunk: size=58744 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-56.htm.html" id="item61" media-type="application/xhtml+xml"/>
+<!--Chunk: size=53208 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-57.htm.html" id="item62" media-type="application/xhtml+xml"/>
+<!--Chunk: size=53405 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-58.htm.html" id="item63" media-type="application/xhtml+xml"/>
+<!--Chunk: size=53793 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-59.htm.html" id="item64" media-type="application/xhtml+xml"/>
+<!--Chunk: size=54766 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-60.htm.html" id="item65" media-type="application/xhtml+xml"/>
+<!--Chunk: size=55485 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-61.htm.html" id="item66" media-type="application/xhtml+xml"/>
+<!--Chunk: size=51658 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-62.htm.html" id="item67" media-type="application/xhtml+xml"/>
+<!--Chunk: size=52806 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-63.htm.html" id="item68" media-type="application/xhtml+xml"/>
+<!--Chunk: size=58713 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-64.htm.html" id="item69" media-type="application/xhtml+xml"/>
+<!--Chunk: size=57610 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-65.htm.html" id="item70" media-type="application/xhtml+xml"/>
+<!--Chunk: size=60847 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-66.htm.html" id="item71" media-type="application/xhtml+xml"/>
+<!--Chunk: size=57840 Split on div-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-67.htm.html" id="item72" media-type="application/xhtml+xml"/>
+<!--Chunk: size=68925-->
+<item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-68.htm.html" id="item73" media-type="application/xhtml+xml"/>
+<item href="toc.ncx" id="ncx" media-type="application/x-dtbncx+xml"/>
+<item href="wrap0000.html" id="coverpage-wrapper" media-type="application/xhtml+xml"/>
+</manifest>
+<spine toc="ncx">
+<itemref idref="coverpage-wrapper" linear="yes"/>
+<itemref idref="item5" linear="yes"/>
+<itemref idref="item6" linear="yes"/>
+<itemref idref="item7" linear="yes"/>
+<itemref idref="item8" linear="yes"/>
+<itemref idref="item9" linear="yes"/>
+<itemref idref="item10" linear="yes"/>
+<itemref idref="item11" linear="yes"/>
+<itemref idref="item12" linear="yes"/>
+<itemref idref="item13" linear="yes"/>
+<itemref idref="item14" linear="yes"/>
+<itemref idref="item15" linear="yes"/>
+<itemref idref="item16" linear="yes"/>
+<itemref idref="item17" linear="yes"/>
+<itemref idref="item18" linear="yes"/>
+<itemref idref="item19" linear="yes"/>
+<itemref idref="item20" linear="yes"/>
+<itemref idref="item21" linear="yes"/>
+<itemref idref="item22" linear="yes"/>
+<itemref idref="item23" linear="yes"/>
+<itemref idref="item24" linear="yes"/>
+<itemref idref="item25" linear="yes"/>
+<itemref idref="item26" linear="yes"/>
+<itemref idref="item27" linear="yes"/>
+<itemref idref="item28" linear="yes"/>
+<itemref idref="item29" linear="yes"/>
+<itemref idref="item30" linear="yes"/>
+<itemref idref="item31" linear="yes"/>
+<itemref idref="item32" linear="yes"/>
+<itemref idref="item33" linear="yes"/>
+<itemref idref="item34" linear="yes"/>
+<itemref idref="item35" linear="yes"/>
+<itemref idref="item36" linear="yes"/>
+<itemref idref="item37" linear="yes"/>
+<itemref idref="item38" linear="yes"/>
+<itemref idref="item39" linear="yes"/>
+<itemref idref="item40" linear="yes"/>
+<itemref idref="item41" linear="yes"/>
+<itemref idref="item42" linear="yes"/>
+<itemref idref="item43" linear="yes"/>
+<itemref idref="item44" linear="yes"/>
+<itemref idref="item45" linear="yes"/>
+<itemref idref="item46" linear="yes"/>
+<itemref idref="item47" linear="yes"/>
+<itemref idref="item48" linear="yes"/>
+<itemref idref="item49" linear="yes"/>
+<itemref idref="item50" linear="yes"/>
+<itemref idref="item51" linear="yes"/>
+<itemref idref="item52" linear="yes"/>
+<itemref idref="item53" linear="yes"/>
+<itemref idref="item54" linear="yes"/>
+<itemref idref="item55" linear="yes"/>
+<itemref idref="item56" linear="yes"/>
+<itemref idref="item57" linear="yes"/>
+<itemref idref="item58" linear="yes"/>
+<itemref idref="item59" linear="yes"/>
+<itemref idref="item60" linear="yes"/>
+<itemref idref="item61" linear="yes"/>
+<itemref idref="item62" linear="yes"/>
+<itemref idref="item63" linear="yes"/>
+<itemref idref="item64" linear="yes"/>
+<itemref idref="item65" linear="yes"/>
+<itemref idref="item66" linear="yes"/>
+<itemref idref="item67" linear="yes"/>
+<itemref idref="item68" linear="yes"/>
+<itemref idref="item69" linear="yes"/>
+<itemref idref="item70" linear="yes"/>
+<itemref idref="item71" linear="yes"/>
+<itemref idref="item72" linear="yes"/>
+<itemref idref="item73" linear="yes"/>
+</spine>
+<guide>
+<reference href="wrap0000.html" title="Cover" type="cover"/>
+</guide>
+</package>
+Etree version of parse
+==============================================================================
+<package xmlns:opf="http://www.idpf.org/2007/opf" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.idpf.org/2007/opf" version="2.0" unique-identifier="id">
+  <metadata>
+    <dc:rights>Public domain in the USA.</dc:rights>
+    <dc:identifier opf:scheme="URI" id="id">http://www.gutenberg.org/2600</dc:identifier>
+    <dc:creator opf:file-as="Tolstoy, Leo, graf">graf Leo Tolstoy</dc:creator>
+    <dc:contributor opf:role="trl" opf:file-as="Maude, Aylmer">Aylmer Maude</dc:contributor>
+    <dc:contributor opf:role="trl" opf:file-as="Maude, Louise">Louise Maude</dc:contributor>
+    <dc:title>War and Peace</dc:title>
+    <dc:language xsi:type="dcterms:RFC4646">en</dc:language>
+    <dc:subject>Historical fiction</dc:subject>
+    <dc:subject>War stories</dc:subject>
+    <dc:subject>Napoleonic Wars, 1800-1815 -- Campaigns -- Russia -- Fiction</dc:subject>
+    <dc:subject>Russia -- History -- Alexander I, 1801-1825 -- Fiction</dc:subject>
+    <dc:subject>Aristocracy (Social class) -- Russia -- Fiction</dc:subject>
+    <dc:date opf:event="publication">2001-04-01</dc:date>
+    <dc:date opf:event="conversion">2020-04-02T07:55:23.696736+00:00</dc:date>
+    <dc:source>https://www.gutenberg.org/files/2600/2600-h/2600-h.htm</dc:source>
+    <meta name="cover" content="item1"/>
+  </metadata>
+  <manifest>
+    <!--Image: 484 x 700 size=108909 q=90-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@images@cover.jpg" id="item1" media-type="image/jpeg"/>
+    <item href="pgepub.css" id="item2" media-type="text/css"/>
+    <item href="0.css" id="item3" media-type="text/css"/>
+    <item href="1.css" id="item4" media-type="text/css"/>
+    <!--Chunk: size=57550 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-0.htm.html" id="item5" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=58304 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-1.htm.html" id="item6" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=52760 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-2.htm.html" id="item7" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=57074 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-3.htm.html" id="item8" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=52377 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-4.htm.html" id="item9" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=70163 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-5.htm.html" id="item10" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=60805 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-6.htm.html" id="item11" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=61815 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-7.htm.html" id="item12" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=51653 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-8.htm.html" id="item13" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=64801 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-9.htm.html" id="item14" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=51852 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-10.htm.html" id="item15" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=70173 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-11.htm.html" id="item16" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=63821 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-12.htm.html" id="item17" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=53220 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-13.htm.html" id="item18" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=51538 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-14.htm.html" id="item19" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=56744 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-15.htm.html" id="item20" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=53690 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-16.htm.html" id="item21" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=58241 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-17.htm.html" id="item22" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=57692 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-18.htm.html" id="item23" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=58285 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-19.htm.html" id="item24" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=57202 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-20.htm.html" id="item25" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=63300 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-21.htm.html" id="item26" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=60258 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-22.htm.html" id="item27" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=55844 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-23.htm.html" id="item28" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=54441 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-24.htm.html" id="item29" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=54570 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-25.htm.html" id="item30" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=52393 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-26.htm.html" id="item31" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=51987 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-27.htm.html" id="item32" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=67760 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-28.htm.html" id="item33" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=63989 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-29.htm.html" id="item34" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=63702 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-30.htm.html" id="item35" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=59862 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-31.htm.html" id="item36" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=54267 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-32.htm.html" id="item37" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=59292 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-33.htm.html" id="item38" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=56661 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-34.htm.html" id="item39" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=60083 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-35.htm.html" id="item40" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=56200 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-36.htm.html" id="item41" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=56136 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-37.htm.html" id="item42" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=59126 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-38.htm.html" id="item43" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=53080 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-39.htm.html" id="item44" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=54926 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-40.htm.html" id="item45" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=67086 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-41.htm.html" id="item46" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=57293 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-42.htm.html" id="item47" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=54513 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-43.htm.html" id="item48" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=64103 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-44.htm.html" id="item49" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=57653 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-45.htm.html" id="item50" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=55452 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-46.htm.html" id="item51" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=59269 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-47.htm.html" id="item52" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=53393 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-48.htm.html" id="item53" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=51746 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-49.htm.html" id="item54" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=58540 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-50.htm.html" id="item55" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=66538 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-51.htm.html" id="item56" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=53240 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-52.htm.html" id="item57" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=54375 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-53.htm.html" id="item58" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=54268 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-54.htm.html" id="item59" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=54373 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-55.htm.html" id="item60" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=58744 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-56.htm.html" id="item61" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=53208 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-57.htm.html" id="item62" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=53405 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-58.htm.html" id="item63" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=53793 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-59.htm.html" id="item64" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=54766 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-60.htm.html" id="item65" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=55485 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-61.htm.html" id="item66" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=51658 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-62.htm.html" id="item67" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=52806 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-63.htm.html" id="item68" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=58713 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-64.htm.html" id="item69" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=57610 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-65.htm.html" id="item70" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=60847 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-66.htm.html" id="item71" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=57840 Split on div-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-67.htm.html" id="item72" media-type="application/xhtml+xml"/>
+    <!--Chunk: size=68925-->
+    <item href="@public@vhost@g@gutenberg@html@files@2600@2600-h@2600-h-68.htm.html" id="item73" media-type="application/xhtml+xml"/>
+    <item href="toc.ncx" id="ncx" media-type="application/x-dtbncx+xml"/>
+    <item href="wrap0000.html" id="coverpage-wrapper" media-type="application/xhtml+xml"/>
+  </manifest>
+  <spine toc="ncx">
+    <itemref idref="coverpage-wrapper" linear="yes"/>
+    <itemref idref="item5" linear="yes"/>
+    <itemref idref="item6" linear="yes"/>
+    <itemref idref="item7" linear="yes"/>
+    <itemref idref="item8" linear="yes"/>
+    <itemref idref="item9" linear="yes"/>
+    <itemref idref="item10" linear="yes"/>
+    <itemref idref="item11" linear="yes"/>
+    <itemref idref="item12" linear="yes"/>
+    <itemref idref="item13" linear="yes"/>
+    <itemref idref="item14" linear="yes"/>
+    <itemref idref="item15" linear="yes"/>
+    <itemref idref="item16" linear="yes"/>
+    <itemref idref="item17" linear="yes"/>
+    <itemref idref="item18" linear="yes"/>
+    <itemref idref="item19" linear="yes"/>
+    <itemref idref="item20" linear="yes"/>
+    <itemref idref="item21" linear="yes"/>
+    <itemref idref="item22" linear="yes"/>
+    <itemref idref="item23" linear="yes"/>
+    <itemref idref="item24" linear="yes"/>
+    <itemref idref="item25" linear="yes"/>
+    <itemref idref="item26" linear="yes"/>
+    <itemref idref="item27" linear="yes"/>
+    <itemref idref="item28" linear="yes"/>
+    <itemref idref="item29" linear="yes"/>
+    <itemref idref="item30" linear="yes"/>
+    <itemref idref="item31" linear="yes"/>
+    <itemref idref="item32" linear="yes"/>
+    <itemref idref="item33" linear="yes"/>
+    <itemref idref="item34" linear="yes"/>
+    <itemref idref="item35" linear="yes"/>
+    <itemref idref="item36" linear="yes"/>
+    <itemref idref="item37" linear="yes"/>
+    <itemref idref="item38" linear="yes"/>
+    <itemref idref="item39" linear="yes"/>
+    <itemref idref="item40" linear="yes"/>
+    <itemref idref="item41" linear="yes"/>
+    <itemref idref="item42" linear="yes"/>
+    <itemref idref="item43" linear="yes"/>
+    <itemref idref="item44" linear="yes"/>
+    <itemref idref="item45" linear="yes"/>
+    <itemref idref="item46" linear="yes"/>
+    <itemref idref="item47" linear="yes"/>
+    <itemref idref="item48" linear="yes"/>
+    <itemref idref="item49" linear="yes"/>
+    <itemref idref="item50" linear="yes"/>
+    <itemref idref="item51" linear="yes"/>
+    <itemref idref="item52" linear="yes"/>
+    <itemref idref="item53" linear="yes"/>
+    <itemref idref="item54" linear="yes"/>
+    <itemref idref="item55" linear="yes"/>
+    <itemref idref="item56" linear="yes"/>
+    <itemref idref="item57" linear="yes"/>
+    <itemref idref="item58" linear="yes"/>
+    <itemref idref="item59" linear="yes"/>
+    <itemref idref="item60" linear="yes"/>
+    <itemref idref="item61" linear="yes"/>
+    <itemref idref="item62" linear="yes"/>
+    <itemref idref="item63" linear="yes"/>
+    <itemref idref="item64" linear="yes"/>
+    <itemref idref="item65" linear="yes"/>
+    <itemref idref="item66" linear="yes"/>
+    <itemref idref="item67" linear="yes"/>
+    <itemref idref="item68" linear="yes"/>
+    <itemref idref="item69" linear="yes"/>
+    <itemref idref="item70" linear="yes"/>
+    <itemref idref="item71" linear="yes"/>
+    <itemref idref="item72" linear="yes"/>
+    <itemref idref="item73" linear="yes"/>
+  </spine>
+  <guide>
+    <reference type="cover" title="Cover" href="wrap0000.html"/>
+  </guide>
+</package>
+
diff -Nru beautifulsoup4-4.9.3/PKG-INFO beautifulsoup4-4.10.0/PKG-INFO
--- beautifulsoup4-4.9.3/PKG-INFO	2020-10-03 15:34:16.005351500 +0000
+++ beautifulsoup4-4.10.0/PKG-INFO	2021-09-08 00:13:24.633570700 +0000
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: beautifulsoup4
-Version: 4.9.3
+Version: 4.10.0
 Summary: Screen-scraping library
 Home-page: http://www.crummy.com/software/BeautifulSoup/bs4/
 Author: Leonard Richardson
@@ -62,17 +62,11 @@
         
         # Note on Python 2 sunsetting
         
-        Since 2012, Beautiful Soup has been developed as a Python 2 library
-        which is automatically converted to Python 3 code as necessary. This
-        makes it impossible to take advantage of some features of Python
-        3.
-        
-        For this reason, I plan to discontinue Beautiful Soup's Python 2
-        support at some point after December 31, 2020: one year after the
-        sunset date for Python 2 itself. Beyond that point, new Beautiful Soup
-        development will exclusively target Python 3. Of course, older
-        releases of Beautiful Soup, which support both versions, will continue
-        to be available.
+        Beautiful Soup's support for Python 2 was discontinued on December 31,
+        2020: one year after the sunset date for Python 2 itself. From this
+        point onward, new Beautiful Soup development will exclusively target
+        Python 3. The final release of Beautiful Soup 4 to support Python 2
+        was 4.9.3.
         
         # Supporting the project
         
@@ -102,25 +96,20 @@
         ```
         
         ```
-        $ python -m unittest discover -s bs4
+        $ python3 -m unittest discover -s bs4
         ```
         
-        If you checked out the source tree, you should see a script in the
-        home directory called test-all-versions. This script will run the unit
-        tests under Python 2, then create a temporary Python 3 conversion of
-        the source and run the unit tests again under Python 3.
-        
 Platform: UNKNOWN
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Programming Language :: Python
-Classifier: Programming Language :: Python :: 2.7
 Classifier: Programming Language :: Python :: 3
 Classifier: Topic :: Text Processing :: Markup :: HTML
 Classifier: Topic :: Text Processing :: Markup :: XML
 Classifier: Topic :: Text Processing :: Markup :: SGML
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Requires-Python: >3.0.0
 Description-Content-Type: text/markdown
 Provides-Extra: html5lib
 Provides-Extra: lxml
diff -Nru beautifulsoup4-4.9.3/README.md beautifulsoup4-4.10.0/README.md
--- beautifulsoup4-4.9.3/README.md	2020-06-11 20:10:00.000000000 +0000
+++ beautifulsoup4-4.10.0/README.md	2021-09-07 23:46:27.000000000 +0000
@@ -53,17 +53,11 @@
 
 # Note on Python 2 sunsetting
 
-Since 2012, Beautiful Soup has been developed as a Python 2 library
-which is automatically converted to Python 3 code as necessary. This
-makes it impossible to take advantage of some features of Python
-3.
-
-For this reason, I plan to discontinue Beautiful Soup's Python 2
-support at some point after December 31, 2020: one year after the
-sunset date for Python 2 itself. Beyond that point, new Beautiful Soup
-development will exclusively target Python 3. Of course, older
-releases of Beautiful Soup, which support both versions, will continue
-to be available.
+Beautiful Soup's support for Python 2 was discontinued on December 31,
+2020: one year after the sunset date for Python 2 itself. From this
+point onward, new Beautiful Soup development will exclusively target
+Python 3. The final release of Beautiful Soup 4 to support Python 2
+was 4.9.3.
 
 # Supporting the project
 
@@ -93,10 +87,5 @@
 ```
 
 ```
-$ python -m unittest discover -s bs4
+$ python3 -m unittest discover -s bs4
 ```
-
-If you checked out the source tree, you should see a script in the
-home directory called test-all-versions. This script will run the unit
-tests under Python 2, then create a temporary Python 3 conversion of
-the source and run the unit tests again under Python 3.
diff -Nru beautifulsoup4-4.9.3/setup.py beautifulsoup4-4.10.0/setup.py
--- beautifulsoup4-4.9.3/setup.py	2020-10-03 15:31:00.000000000 +0000
+++ beautifulsoup4-4.10.0/setup.py	2021-09-07 23:53:10.000000000 +0000
@@ -4,38 +4,35 @@
 )
 import sys
 
+from bs4 import __version__
+
 with open("README.md", "r") as fh:
     long_description = fh.read()
 
 setup(
     name="beautifulsoup4",
-    # NOTE: We can't import __version__ from bs4 because bs4/__init__.py is Python 2 code,
-    # and converting it to Python 3 means going through this code to run 2to3.
-    # So we have to specify it twice for the time being.
-    version = '4.9.3',
+    version = __version__,
     author="Leonard Richardson",
     author_email='leonardr@segfault.org',
     url="http://www.crummy.com/software/BeautifulSoup/bs4/",
     download_url = "http://www.crummy.com/software/BeautifulSoup/bs4/download/",
     description="Screen-scraping library",
+    python_requires='>3.0.0',
     install_requires=[
-        "soupsieve >1.2; python_version>='3.0'",
-        "soupsieve >1.2, <2.0; python_version<'3.0'",
+        "soupsieve >1.2",
     ],
     long_description=long_description,
     long_description_content_type="text/markdown",
     license="MIT",
-    packages=find_packages(exclude=['tests*']),
+    packages=find_packages(exclude=['tests*', '*.tests*']),
     extras_require = {
         'lxml' : [ 'lxml'],
         'html5lib' : ['html5lib'],
     },
-    use_2to3 = True,
     classifiers=["Development Status :: 5 - Production/Stable",
                  "Intended Audience :: Developers",
                  "License :: OSI Approved :: MIT License",
                  "Programming Language :: Python",
-                 "Programming Language :: Python :: 2.7",
                  'Programming Language :: Python :: 3',
                  "Topic :: Text Processing :: Markup :: HTML",
                  "Topic :: Text Processing :: Markup :: XML",
diff -Nru beautifulsoup4-4.9.3/test-all-versions beautifulsoup4-4.10.0/test-all-versions
--- beautifulsoup4-4.9.3/test-all-versions	2012-03-30 12:30:56.000000000 +0000
+++ beautifulsoup4-4.10.0/test-all-versions	2021-09-07 23:43:30.000000000 +0000
@@ -1 +1 @@
-python2.7 -m unittest discover -s bs4 && ./convert-py3k
+python3 -m unittest discover -s bs4