diff -Nru python-mechanicalsoup-0.8.0/debian/changelog python-mechanicalsoup-0.10.0/debian/changelog --- python-mechanicalsoup-0.8.0/debian/changelog 2017-10-10 13:27:05.000000000 +0000 +++ python-mechanicalsoup-0.10.0/debian/changelog 2018-02-15 16:18:55.000000000 +0000 @@ -1,3 +1,22 @@ +python-mechanicalsoup (0.10.0-1) unstable; urgency=medium + + [ Ondřej Nový ] + * d/control: Set Vcs-* to salsa.debian.org + + [ Ghislain Antony Vaillant ] + * New upstream version 0.10.0 (Closes: #883366) + * Refresh the patch queue + * Update the copyright years + * Drop the get-orig-source target + * Normalize the package descriptions + * Bump the debhelper version to 11 + * Bump the standards version to 4.1.3 + * Explicitly disable testing at build time. + Reason: Tests require network access + * Add pytest-mock to the autopkgtest Depends + + -- Ghislain Antony Vaillant Thu, 15 Feb 2018 16:18:55 +0000 + python-mechanicalsoup (0.8.0-1) unstable; urgency=medium * Add recommended get-orig-source target diff -Nru python-mechanicalsoup-0.8.0/debian/compat python-mechanicalsoup-0.10.0/debian/compat --- python-mechanicalsoup-0.8.0/debian/compat 2017-10-10 13:27:05.000000000 +0000 +++ python-mechanicalsoup-0.10.0/debian/compat 2018-02-15 16:18:55.000000000 +0000 @@ -1 +1 @@ -10 +11 diff -Nru python-mechanicalsoup-0.8.0/debian/control python-mechanicalsoup-0.10.0/debian/control --- python-mechanicalsoup-0.8.0/debian/control 2017-10-10 13:27:05.000000000 +0000 +++ python-mechanicalsoup-0.10.0/debian/control 2018-02-15 16:18:55.000000000 +0000 @@ -3,7 +3,7 @@ Uploaders: Ghislain Antony Vaillant Section: python Priority: optional -Build-Depends: debhelper (>= 10), +Build-Depends: debhelper (>= 11), dh-python, python-all, python-bs4, @@ -15,9 +15,9 @@ python3-requests (>= 2.0), python3-setuptools, python3-six (>= 1.4) -Standards-Version: 4.1.1 -Vcs-Browser: https://anonscm.debian.org/cgit/python-modules/packages/python-mechanicalsoup.git -Vcs-Git: https://anonscm.debian.org/git/python-modules/packages/python-mechanicalsoup.git +Standards-Version: 4.1.3 +Vcs-Browser: https://salsa.debian.org/python-team/modules/python-mechanicalsoup +Vcs-Git: https://salsa.debian.org/python-team/modules/python-mechanicalsoup.git Homepage: https://github.com/hickford/MechanicalSoup X-Python-Version: >= 2.7 X-Python3-Version: >= 3.4 @@ -34,7 +34,7 @@ MechanicalSoup provides a similar API to the Mechanize library using Requests (for http sessions) and BeautifulSoup (for document navigation). . - This package provides the library for Python 2. + This package provides the modules for Python 2. Package: python3-mechanicalsoup Architecture: all @@ -48,4 +48,4 @@ MechanicalSoup provides a similar API to the Mechanize library using Requests (for http sessions) and BeautifulSoup (for document navigation). . - This package provides the library for Python 3. + This package provides the modules for Python 3. diff -Nru python-mechanicalsoup-0.8.0/debian/copyright python-mechanicalsoup-0.10.0/debian/copyright --- python-mechanicalsoup-0.8.0/debian/copyright 2017-10-10 13:27:05.000000000 +0000 +++ python-mechanicalsoup-0.10.0/debian/copyright 2018-02-15 16:18:55.000000000 +0000 @@ -8,7 +8,7 @@ License: Expat Files: debian/* -Copyright: 2016 Ghislain Antony Vaillant +Copyright: 2016-2018 Ghislain Antony Vaillant License: Expat License: Expat diff -Nru python-mechanicalsoup-0.8.0/debian/patches/No-pytest-runner.patch python-mechanicalsoup-0.10.0/debian/patches/No-pytest-runner.patch --- python-mechanicalsoup-0.8.0/debian/patches/No-pytest-runner.patch 2017-10-10 13:27:05.000000000 +0000 +++ python-mechanicalsoup-0.10.0/debian/patches/No-pytest-runner.patch 2018-02-15 16:18:55.000000000 +0000 @@ -3,20 +3,17 @@ Subject: No pytest-runner --- - setup.py | 3 --- - 1 file changed, 3 deletions(-) + setup.py | 1 - + 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py -index 288d4b3..9388e8a 100644 +index c7fe7a4..4971f73 100644 --- a/setup.py +++ b/setup.py -@@ -44,9 +44,6 @@ setup( - 'beautifulsoup4', - 'six >= 1.4' - ], -- setup_requires=[ -- 'pytest-runner', -- ], - tests_require=[ - 'pytest', - 'pytest-cov', +@@ -62,6 +62,5 @@ setup( + # "install_requires" vs pip's requirements files see: + # https://packaging.python.org/en/latest/requirements.html + install_requires=requirements_from_file('requirements.txt'), +- setup_requires=['pytest-runner'], + tests_require=requirements_from_file('tests/requirements.txt'), + ) diff -Nru python-mechanicalsoup-0.8.0/debian/rules python-mechanicalsoup-0.10.0/debian/rules --- python-mechanicalsoup-0.8.0/debian/rules 2017-10-10 13:27:05.000000000 +0000 +++ python-mechanicalsoup-0.10.0/debian/rules 2018-02-15 16:18:55.000000000 +0000 @@ -5,8 +5,8 @@ export PYBUILD_NAME=mechanicalsoup +# Tests require network access. +export PYBUILD_DISABLE=test + %: dh $@ --with python2,python3 --buildsystem=pybuild - -get-orig-source: - uscan --download-current-version --force-download --no-symlink diff -Nru python-mechanicalsoup-0.8.0/debian/tests/control python-mechanicalsoup-0.10.0/debian/tests/control --- python-mechanicalsoup-0.8.0/debian/tests/control 2017-10-10 13:27:05.000000000 +0000 +++ python-mechanicalsoup-0.10.0/debian/tests/control 2018-02-15 16:18:55.000000000 +0000 @@ -8,6 +8,7 @@ Depends: python-all, python-mechanicalsoup, python-pytest, + python-pytest-mock, python-requests-mock Test-Command: set -e @@ -20,4 +21,5 @@ Depends: python3-all, python3-mechanicalsoup, python3-pytest, + python3-pytest-mock, python3-requests-mock diff -Nru python-mechanicalsoup-0.8.0/example_manual.py python-mechanicalsoup-0.10.0/example_manual.py --- python-mechanicalsoup-0.8.0/example_manual.py 2017-05-07 13:55:22.000000000 +0000 +++ python-mechanicalsoup-0.10.0/example_manual.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,44 +0,0 @@ -"""Example app to login to GitHub, using the plain Browser class. - -See example.py for an example using the more advanced StatefulBrowser.""" -import argparse -import mechanicalsoup - -parser = argparse.ArgumentParser(description="Login to GitHub.") -parser.add_argument("username") -parser.add_argument("password") -args = parser.parse_args() - -browser = mechanicalsoup.Browser(soup_config={'features': 'lxml'}) - -# request github login page. the result is a requests.Response object -# http://docs.python-requests.org/en/latest/user/quickstart/#response-content -login_page = browser.get("https://github.com/login") - -# similar to assert login_page.ok but with full status code in case of -# failure. -login_page.raise_for_status() - -# login_page.soup is a BeautifulSoup object -# http://www.crummy.com/software/BeautifulSoup/bs4/doc/#beautifulsoup -# we grab the login form -login_form = mechanicalsoup.Form(login_page.soup.select_one('#login form')) - -# specify username and password -login_form.input({"login": args.username, "password": args.password}) - -# submit form -page2 = browser.submit(login_form, login_page.url) - -# verify we are now logged in -messages = page2.soup.find("div", class_="flash-messages") -if messages: - print(messages.text) -assert page2.soup.select(".logout-form") - -print(page2.soup.title.text) - -# verify we remain logged in (thanks to cookies) as we browse the rest of -# the site -page3 = browser.get("https://github.com/hickford/MechanicalSoup") -assert page3.soup.select(".logout-form") diff -Nru python-mechanicalsoup-0.8.0/example.py python-mechanicalsoup-0.10.0/example.py --- python-mechanicalsoup-0.8.0/example.py 2017-06-17 15:33:23.000000000 +0000 +++ python-mechanicalsoup-0.10.0/example.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,43 +0,0 @@ -"""Example app to login to GitHub using the StatefulBrowser class.""" - -from __future__ import print_function -import argparse -import mechanicalsoup -from getpass import getpass - -parser = argparse.ArgumentParser(description="Login to GitHub.") -parser.add_argument("username") -args = parser.parse_args() - -args.password = getpass("Please enter your GitHub password: ") - -browser = mechanicalsoup.StatefulBrowser( - soup_config={'features': 'lxml'}, - raise_on_404=True -) -# Uncomment for a more verbose output: -# browser.set_verbose(2) - -browser.open("https://github.com") -browser.follow_link("login") -browser.select_form('#login form') -browser["login"] = args.username -browser["password"] = args.password -resp = browser.submit_selected() - -# Uncomment to launch a web browser on the current page: -# browser.launch_browser() - -# verify we are now logged in -page = browser.get_current_page() -messages = page.find("div", class_="flash-messages") -if messages: - print(messages.text) -assert page.select(".logout-form") - -print(page.title.text) - -# verify we remain logged in (thanks to cookies) as we browse the rest of -# the site -page3 = browser.open("https://github.com/hickford/MechanicalSoup") -assert page3.soup.select(".logout-form") diff -Nru python-mechanicalsoup-0.8.0/examples/example_manual.py python-mechanicalsoup-0.10.0/examples/example_manual.py --- python-mechanicalsoup-0.8.0/examples/example_manual.py 1970-01-01 00:00:00.000000000 +0000 +++ python-mechanicalsoup-0.10.0/examples/example_manual.py 2018-02-04 00:51:13.000000000 +0000 @@ -0,0 +1,44 @@ +"""Example app to login to GitHub, using the plain Browser class. + +See example.py for an example using the more advanced StatefulBrowser.""" +import argparse +import mechanicalsoup + +parser = argparse.ArgumentParser(description="Login to GitHub.") +parser.add_argument("username") +parser.add_argument("password") +args = parser.parse_args() + +browser = mechanicalsoup.Browser(soup_config={'features': 'lxml'}) + +# request github login page. the result is a requests.Response object +# http://docs.python-requests.org/en/latest/user/quickstart/#response-content +login_page = browser.get("https://github.com/login") + +# similar to assert login_page.ok but with full status code in case of +# failure. +login_page.raise_for_status() + +# login_page.soup is a BeautifulSoup object +# http://www.crummy.com/software/BeautifulSoup/bs4/doc/#beautifulsoup +# we grab the login form +login_form = mechanicalsoup.Form(login_page.soup.select_one('#login form')) + +# specify username and password +login_form.input({"login": args.username, "password": args.password}) + +# submit form +page2 = browser.submit(login_form, login_page.url) + +# verify we are now logged in +messages = page2.soup.find("div", class_="flash-messages") +if messages: + print(messages.text) +assert page2.soup.select(".logout-form") + +print(page2.soup.title.text) + +# verify we remain logged in (thanks to cookies) as we browse the rest of +# the site +page3 = browser.get("https://github.com/MechanicalSoup/MechanicalSoup") +assert page3.soup.select(".logout-form") diff -Nru python-mechanicalsoup-0.8.0/examples/example.py python-mechanicalsoup-0.10.0/examples/example.py --- python-mechanicalsoup-0.8.0/examples/example.py 1970-01-01 00:00:00.000000000 +0000 +++ python-mechanicalsoup-0.10.0/examples/example.py 2018-02-04 00:51:13.000000000 +0000 @@ -0,0 +1,44 @@ +"""Example app to login to GitHub using the StatefulBrowser class.""" + +from __future__ import print_function +import argparse +import mechanicalsoup +from getpass import getpass + +parser = argparse.ArgumentParser(description="Login to GitHub.") +parser.add_argument("username") +args = parser.parse_args() + +args.password = getpass("Please enter your GitHub password: ") + +browser = mechanicalsoup.StatefulBrowser( + soup_config={'features': 'lxml'}, + raise_on_404=True, + user_agent='MyBot/0.1: mysite.example.com/bot_info', +) +# Uncomment for a more verbose output: +# browser.set_verbose(2) + +browser.open("https://github.com") +browser.follow_link("login") +browser.select_form('#login form') +browser["login"] = args.username +browser["password"] = args.password +resp = browser.submit_selected() + +# Uncomment to launch a web browser on the current page: +# browser.launch_browser() + +# verify we are now logged in +page = browser.get_current_page() +messages = page.find("div", class_="flash-messages") +if messages: + print(messages.text) +assert page.select(".logout-form") + +print(page.title.text) + +# verify we remain logged in (thanks to cookies) as we browse the rest of +# the site +page3 = browser.open("https://github.com/MechanicalSoup/MechanicalSoup") +assert page3.soup.select(".logout-form") diff -Nru python-mechanicalsoup-0.8.0/MANIFEST.in python-mechanicalsoup-0.10.0/MANIFEST.in --- python-mechanicalsoup-0.8.0/MANIFEST.in 2017-05-07 13:55:22.000000000 +0000 +++ python-mechanicalsoup-0.10.0/MANIFEST.in 2018-02-04 00:51:13.000000000 +0000 @@ -1,3 +1,4 @@ -include LICENSE README.md +include LICENSE README.rst recursive-include tests *.py -include example*.py +include examples/example*.py +include requirements.txt tests/requirements.txt diff -Nru python-mechanicalsoup-0.8.0/mechanicalsoup/browser.py python-mechanicalsoup-0.10.0/mechanicalsoup/browser.py --- python-mechanicalsoup-0.8.0/mechanicalsoup/browser.py 2017-09-18 16:52:14.000000000 +0000 +++ python-mechanicalsoup-0.10.0/mechanicalsoup/browser.py 2018-02-04 00:51:13.000000000 +0000 @@ -1,4 +1,3 @@ -import warnings import requests import bs4 from six.moves import urllib @@ -8,20 +7,44 @@ import tempfile from .utils import LinkNotFoundError from .__version__ import __version__, __title__ - -# see -# https://www.crummy.com/software/BeautifulSoup/bs4/doc/#specifying-the-parser-to-use -warnings.filterwarnings( - "ignore", "No parser was explicitly specified", module="bs4") +import weakref class Browser(object): + """Builds a Browser. - def __init__(self, session=None, soup_config=None, requests_adapters=None, + :param session: Attach a pre-existing requests Session instead of + constructing a new one. + :param soup_config: Configuration passed to BeautifulSoup to affect + the way HTML is parsed. Defaults to ``{'features': 'lxml'}``. + If overriden, it is highly recommended to `specify a parser + `__. + Otherwise, BeautifulSoup will issue a warning and pick one for + you, but the parser it chooses may be different on different + machines. + :param requests_adapters: Configuration passed to requests, to affect + the way HTTP requests are performed. + :param raise_on_404: If True, raise :class:`LinkNotFoundError` + when visiting a page triggers a 404 Not Found error. + :param user_agent: Set the user agent header to this value. + + See also: :func:`StatefulBrowser` + + """ + def __init__(self, session=None, soup_config={'features': 'lxml'}, + requests_adapters=None, raise_on_404=False, user_agent=None): - self.__raise_on_404 = raise_on_404 + + self.raise_on_404 = raise_on_404 self.session = session or requests.Session() + if hasattr(weakref, 'finalize'): + self._finalize = weakref.finalize(self.session, self.close) + else: # pragma: no cover + # Python < 3 does not have weakref.finalize, but these + # versions accept calling session.close() within __del__ + self._finalize = self.close + self.set_user_agent(user_agent) if requests_adapters is not None: @@ -32,14 +55,21 @@ @staticmethod def add_soup(response, soup_config): + """Attaches a soup object to a requests response.""" if "text/html" in response.headers.get("Content-Type", ""): - response.soup = bs4.BeautifulSoup( - response.content, **soup_config) + response.soup = bs4.BeautifulSoup(response.content, **soup_config) + else: + response.soup = None def set_cookiejar(self, cookiejar): """Replaces the current cookiejar in the requests session. Since the session handles cookies automatically without calling this function, - only use this when default cookie handling is insufficient.""" + only use this when default cookie handling is insufficient. + + :param cookiejar: Any `cookielib.CookieJar + `__ + compatible object. + """ self.session.cookies = cookiejar def get_cookiejar(self): @@ -47,37 +77,59 @@ return self.session.cookies def set_user_agent(self, user_agent): + """Replaces the current user agent in the requests session headers.""" # set a default user_agent if not specified if user_agent is None: - try: - requests_ua = requests.utils.default_user_agent() - except AttributeError: - user_agent = '%s/%s' % (__title__, __version__) - else: - user_agent = '%s (%s/%s)' % ( - requests_ua, __title__, __version__) + requests_ua = requests.utils.default_user_agent() + user_agent = '%s (%s/%s)' % (requests_ua, __title__, __version__) # the requests module uses a case-insensitive dict for session headers self.session.headers['User-agent'] = user_agent def request(self, *args, **kwargs): + """Straightforward wrapper around `requests.Session.request + `__. + + :return: `requests.Response + `__ + object with a *soup*-attribute added by :func:`add_soup`. + + This is a low-level function that should not be called for + basic usage (use :func:`get` or :func:`post` instead). Use it if you + need an HTTP verb that MechanicalSoup doesn't manage (e.g. MKCOL) for + example. + """ response = self.session.request(*args, **kwargs) Browser.add_soup(response, self.soup_config) return response def get(self, *args, **kwargs): + """Straightforward wrapper around `requests.Session.get + `__. + + :return: `requests.Response + `__ + object with a *soup*-attribute added by :func:`add_soup`. + """ response = self.session.get(*args, **kwargs) - if self.__raise_on_404 and response.status_code == 404: + if self.raise_on_404 and response.status_code == 404: raise LinkNotFoundError() Browser.add_soup(response, self.soup_config) return response def post(self, *args, **kwargs): + """Straightforward wrapper around `requests.Session.post + `__. + + :return: `requests.Response + `__ + object with a *soup*-attribute added by :func:`add_soup`. + """ response = self.session.post(*args, **kwargs) Browser.add_soup(response, self.soup_config) return response - def _build_request(self, form, url=None, **kwargs): + def _request(self, form, url=None, **kwargs): method = str(form.get("method", "get")) action = form.get("action") url = urllib.parse.urljoin(url, action) @@ -141,30 +193,48 @@ else: kwargs["data"] = data - return requests.Request(method, url, files=files, **kwargs) - - def _prepare_request(self, form, url=None, **kwargs): - request = self._build_request(form, url, **kwargs) - return self.session.prepare_request(request) + return self.session.request(method, url, files=files, **kwargs) def submit(self, form, url=None, **kwargs): + """Prepares and sends a form request. + + :param form: The filled-out form. + :param url: URL of the page the form is on. If the form action is a + relative path, then this must be specified. + :param \*\*kwargs: Arguments forwarded to `requests.Session.request + `__. + + :return: `requests.Response + `__ + object with a *soup*-attribute added by :func:`add_soup`. + """ if isinstance(form, Form): form = form.form - request = self._prepare_request(form, url, **kwargs) - response = self.session.send(request) + response = self._request(form, url, **kwargs) Browser.add_soup(response, self.soup_config) return response def launch_browser(self, soup): - """Launch a browser on the page, for debugging purpose.""" + """Launch a browser to display a page, for debugging purposes. + + :param: soup: Page contents to display, supplied as a bs4 soup object. + """ with tempfile.NamedTemporaryFile(delete=False) as file: file.write(soup.encode()) webbrowser.open('file://' + file.name) def close(self): - """Close the current session""" - self.session.cookies.clear() - self.session.close() + """Close the current session, if still open.""" + if self.session is not None: + self.session.cookies.clear() + self.session.close() + self.session = None def __del__(self): + self._finalize() + + def __enter__(self): + return self + + def __exit__(self, *args): self.close() diff -Nru python-mechanicalsoup-0.8.0/mechanicalsoup/form.py python-mechanicalsoup-0.10.0/mechanicalsoup/form.py --- python-mechanicalsoup-0.8.0/mechanicalsoup/form.py 2017-10-01 12:15:16.000000000 +0000 +++ python-mechanicalsoup-0.10.0/mechanicalsoup/form.py 2018-02-04 00:51:13.000000000 +0000 @@ -1,105 +1,279 @@ +from __future__ import print_function +import copy from .utils import LinkNotFoundError from bs4 import BeautifulSoup +class InvalidFormMethod(LinkNotFoundError): + """This exception is raised when a method of :class:`Form` is used + for an HTML element that is of the wrong type (or is malformed). + It is caught within :func:`Form.set` to perform element type deduction. + + It is derived from :class:`LinkNotFoundError` so that a single base class + can be used to catch all exceptions specific to this module. + """ + pass + + class Form(object): + """Build a fillable form. + + :param form: A bs4.element.Tag corresponding to an HTML form element. + + The Form class is responsible for preparing HTML forms for submission. + It handles the following types of elements: + input (text, checkbox, radio), select, and textarea. + + Each type is set by a method named after the type (e.g. + :func:`~Form.set_select`), and then there are convenience methods (e.g. + :func:`~Form.set`) that do type-deduction and set the value using the + appropriate method. + + It also handles submit-type elements using :func:`~Form.choose_submit`. + """ def __init__(self, form): self.form = form - def input(self, data): + # Aliases for backwards compatibility + # (Included specifically in __init__ to suppress them in Sphinx docs) + self.attach = self.set_input + self.input = self.set_input + self.textarea = self.set_textarea + + def set_input(self, data): + """Fill-in a set of fields in a form. + + Example: filling-in a login/password form + + .. code-block:: python + + form.set_input({"login": username, "password": password}) + + This will find the input element named "login" and give it the + value ``username``, and the input element named "password" and + give it the value ``password``. + """ + for (name, value) in data.items(): i = self.form.find("input", {"name": name}) if not i: - raise LinkNotFoundError("No input field named " + name) + raise InvalidFormMethod("No input field named " + name) i["value"] = value - attach = input - def uncheck_all(self, name): + """Remove the *checked*-attribute of all input elements with + a *name*-attribute given by ``name``. + """ for option in self.form.find_all("input", {"name": name}): if "checked" in option.attrs: del option.attrs["checked"] def check(self, data): + """For backwards compatibility, this method handles checkboxes + and radio buttons in a single call. It will not uncheck any + checkboxes unless explicitly specified by ``data``, in contrast + with the default behavior of :func:`~Form.set_checkbox`. + """ for (name, value) in data.items(): - # Complain if we don't find the name, regardless of the - # value - inputs = self.form.find_all("input", {"name": name}) - if inputs == []: - raise LinkNotFoundError("No input checkbox named " + name) - type = inputs[0].attrs.get('type', 'text') - if type == "radio": + try: + self.set_checkbox({name: value}, uncheck_other_boxes=False) + continue + except InvalidFormMethod: + pass + try: + self.set_radio({name: value}) + continue + except InvalidFormMethod: + pass + raise LinkNotFoundError("No input checkbox/radio named " + name) + + def set_checkbox(self, data, uncheck_other_boxes=True): + """Set the *checked*-attribute of input elements of type "checkbox" + specified by ``data`` (i.e. check boxes). + + :param data: Dict of ``{name: value, ...}``. + In the family of checkboxes whose *name*-attribute is ``name``, + check the box whose *value*-attribute is ``value``. All boxes in + the family can be checked (unchecked) if ``value`` is True (False). + To check multiple specific boxes, let ``value`` be a tuple or list. + :param uncheck_other_boxes: If True (default), before checking any + boxes specified by ``data``, uncheck the entire checkbox family. + Consider setting to False if some boxes are checked by default when + the HTML is served. + """ + for (name, value) in data.items(): + checkboxes = self.form.find_all("input", {"name": name}, + type="checkbox") + if not checkboxes: + raise InvalidFormMethod("No input checkbox named " + name) + + # uncheck if requested + if uncheck_other_boxes: self.uncheck_all(name) - # Accept individual values (int, str) - # We just wrap them in a 1-value tuple. + # Wrap individual values (e.g. int, str) in a 1-element tuple. if not isinstance(value, list) and not isinstance(value, tuple): value = (value,) + + # Check or uncheck one or more boxes for choice in value: - choice = str(choice) # Allow for example literal numbers - found = False - for i in inputs: - if i.attrs.get("value", "on") == choice: - i["checked"] = "" - found = True + choice_str = str(choice) # Allow for example literal numbers + for checkbox in checkboxes: + if checkbox.attrs.get("value", "on") == choice_str: + checkbox["checked"] = "" + break + # Allow specifying True or False to check/uncheck + elif choice is True: + checkbox["checked"] = "" + break + elif choice is False: + if "checked" in checkbox.attrs: + del checkbox.attrs["checked"] break - if not found: - print(self.form) + else: raise LinkNotFoundError( "No input checkbox named %s with choice %s" % (name, choice) - ) + ) - def textarea(self, data): + def set_radio(self, data): + """Set the *checked*-attribute of input elements of type "radio" + specified by ``data`` (i.e. select radio buttons). + + :param data: Dict of ``{name: value, ...}``. + In the family of radio buttons whose *name*-attribute is ``name``, + check the radio button whose *value*-attribute is ``value``. + Only one radio button in the family can be checked. + """ + for (name, value) in data.items(): + radios = self.form.find_all("input", {"name": name}, type="radio") + if not radios: + raise InvalidFormMethod("No input radio named " + name) + + # only one radio button can be checked + self.uncheck_all(name) + + # Check the appropriate radio button (value cannot be a list/tuple) + for radio in radios: + if radio.attrs.get("value", "on") == str(value): + radio["checked"] = "" + break + else: + raise LinkNotFoundError( + "No input radio named %s with choice %s" % (name, value) + ) + + def set_textarea(self, data): + """Set the *string*-attribute of the first textarea element + specified by ``data`` (i.e. set the text of a textarea). + + :param data: Dict of ``{name: value, ...}``. + The textarea whose *name*-attribute is ``name`` will have + its *string*-attribute set to ``value``. + """ for (name, value) in data.items(): t = self.form.find("textarea", {"name": name}) if not t: - raise LinkNotFoundError("No textarea named " + name) + raise InvalidFormMethod("No textarea named " + name) t.string = value - def __setitem__(self, name, value): - return self.set(name, value) + def set_select(self, data): + """Set the *selected*-attribute of the first option element + specified by ``data`` (i.e. select an option from a dropdown). + + :param data: Dict of ``{name: value, ...}``. + Find the select element whose *name*-attribute is ``name``. + Then select from among its children the option element whose + *value*-attribute is ``value``. If the select element's + *multiple*-attribute is set, then ``value`` can be a list + or tuple to select multiple options. + """ + for (name, value) in data.items(): + select = self.form.find("select", {"name": name}) + if not select: + raise InvalidFormMethod("No select named " + name) - def set(self, name, value, force=False): - input = self.form.find("input", {"name": name}) - if input: - if input.attrs.get('type', 'text') in ("radio", "checkbox"): - if value is True: - # f["foo"] = True checks the box foo - input.attrs["checked"] = "" - else: - self.check({name: value}) - else: - input["value"] = value - return - textarea = self.form.find("textarea", {"name": name}) - if textarea: - textarea.string = value - return - select = self.form.find("select", {"name": name}) - if select: + # Deselect all options first for option in select.find_all("option"): if "selected" in option.attrs: del option.attrs["selected"] - o = select.find("option", {"value": value}) - o.attrs["selected"] = "selected" - return + + # Wrap individual values in a 1-element tuple. + # If value is a list/tuple, select must be a ``) will be + added using :func:`~Form.new_control`. + + Example: filling-in a login/password form with EULA checkbox + + .. code-block:: python + + form.set("login", username) + form.set("password", password) + form.set("eula-checkbox", True) + + Example: uploading a file through a ```` field (provide the path to the local file, + and its content will be uploaded): + + .. code-block:: python + + form.set("tagname") = path_to_local_file + + """ + for func in ("checkbox", "radio", "input", "textarea", "select"): + try: + getattr(self, "set_" + func)({name: value}) + return + except InvalidFormMethod: + pass if force: - self.new_control('input', name, value=value) + self.new_control('text', name, value=value) return - raise LinkNotFoundError() + raise LinkNotFoundError("No valid element named " + name) def new_control(self, type, name, value, **kwargs): - old = self.form.find('input', {'name': name}) - if old: + """Add a new input element to the form. + + The arguments set the attributes of the new element. + """ + old_input = self.form.find_all('input', {'name': name}) + for old in old_input: old.decompose() - old = self.form.find('textarea', {'name': name}) - if old: + old_textarea = self.form.find_all('textarea', {'name': name}) + for old in old_textarea: old.decompose() - # We don't have access to the original soup object, so we - # instantiate a new BeautifulSoup() to call new_tag(). - control = BeautifulSoup().new_tag('input') + # We don't have access to the original soup object (just the + # Tag), so we instantiate a new BeautifulSoup() to call + # new_tag(). We're only building the soup object, not parsing + # anything, so the parser doesn't matter. Specify the one + # included in Python to avoid having dependency issue. + control = BeautifulSoup("", "html.parser").new_tag('input') control['type'] = type control['name'] = name control['value'] = value @@ -108,30 +282,36 @@ self.form.append(control) return control - def choose_submit(self, el): - '''Selects the submit input (or button) element specified by 'el', - where 'el' can be either a bs4.element.Tag or just its name attribute. + def choose_submit(self, submit): + """Selects the input (or button) element to use for form submission. + + :param submit: The bs4.element.Tag (or just its *name*-attribute) that + identifies the submit element to use. + + To simulate a normal web browser, only one submit element must be + sent. Therefore, this does not need to be called if there is only + one submit element in the form. + If the element is not found or if multiple elements match, raise a - LinkNotFoundError exception.''' - # In a normal web browser, when a input[type=submit] is clicked, - # all other submits aren't sent. You can use simulate this as - # following: - - # page = browser.get(URL) - # form_el = page.soup.form - # form = Form(form_el) - # submit = page.soup.select(SUBMIT_SELECTOR)[0] - # form.choose_submit(submit) - # url = BASE_DOMAIN + form_el.attrs['action'] - # return browser.submit(form, url) + :class:`LinkNotFoundError` exception. + + Example: :: + + browser = mechanicalsoup.StatefulBrowser() + browser.open(url) + form = browser.select_form() + form.choose_submit('form_name_attr') + browser.submit_selected() + """ found = False inps = self.form.select('input[type="submit"], button[type="submit"]') for inp in inps: - if inp == el or inp['name'] == el: + if inp == submit or (inp.has_attr('name') and + inp['name'] == submit): if found: raise LinkNotFoundError( - "Multiple submit elements match: {0}".format(el) + "Multiple submit elements match: {0}".format(submit) ) found = True continue @@ -140,5 +320,20 @@ if not found: raise LinkNotFoundError( - "Specified submit element not found: {0}".format(el) + "Specified submit element not found: {0}".format(submit) ) + + def print_summary(self): + """Print a summary of the form. + + May help finding which fields need to be filled-in. + """ + for input in self.form.find_all( + ("input", "textarea", "select")): + input_copy = copy.copy(input) + # Text between the opening tag and the closing tag often + # contains a lot of spaces that we don't want here. + for subtag in input_copy.find_all() + [input_copy]: + if subtag.string: + subtag.string = subtag.string.strip() + print(input_copy) diff -Nru python-mechanicalsoup-0.8.0/mechanicalsoup/__init__.py python-mechanicalsoup-0.10.0/mechanicalsoup/__init__.py --- python-mechanicalsoup-0.8.0/mechanicalsoup/__init__.py 2017-08-17 15:13:54.000000000 +0000 +++ python-mechanicalsoup-0.10.0/mechanicalsoup/__init__.py 2018-02-04 00:51:13.000000000 +0000 @@ -1,8 +1,8 @@ from .utils import LinkNotFoundError from .browser import Browser -from .form import Form +from .form import Form, InvalidFormMethod from .stateful_browser import StatefulBrowser from .__version__ import __version__ -__all__ = ['LinkNotFoundError', 'Browser', 'StatefulBrowser', 'Form', - '__version__'] +__all__ = ['StatefulBrowser', 'LinkNotFoundError', 'Browser', 'Form', + 'InvalidFormMethod', '__version__'] diff -Nru python-mechanicalsoup-0.8.0/mechanicalsoup/stateful_browser.py python-mechanicalsoup-0.10.0/mechanicalsoup/stateful_browser.py --- python-mechanicalsoup-0.8.0/mechanicalsoup/stateful_browser.py 2017-10-01 12:15:16.000000000 +0000 +++ python-mechanicalsoup-0.10.0/mechanicalsoup/stateful_browser.py 2018-02-04 00:51:13.000000000 +0000 @@ -9,16 +9,56 @@ import bs4 +class _BrowserState: + def __init__(self, page=None, url=None, form=None, request=None): + self.page = page + self.url = url + self.form = form + self.request = request + + class StatefulBrowser(Browser): - def __init__(self, session=None, soup_config=None, requests_adapters=None, - *args, **kwargs): - super(StatefulBrowser, self).__init__( - session, soup_config, requests_adapters, *args, **kwargs) + """An extension of :class:`Browser` that stores the browser's state + and provides many convenient functions for interacting with HTML elements. + It is the primary tool in MechanicalSoup for interfacing with websites. + + :param session: Attach a pre-existing requests Session instead of + constructing a new one. + :param soup_config: Configuration passed to BeautifulSoup to affect + the way HTML is parsed. Defaults to ``{'features': 'lxml'}``. + If overriden, it is highly recommended to `specify a parser + `__. + Otherwise, BeautifulSoup will issue a warning and pick one for + you, but the parser it chooses may be different on different + machines. + :param requests_adapters: Configuration passed to requests, to affect + the way HTTP requests are performed. + :param raise_on_404: If True, raise :class:`LinkNotFoundError` + when visiting a page triggers a 404 Not Found error. + :param user_agent: Set the user agent header to this value. + + All arguments are forwarded to :func:`Browser`. + + Examples :: + + browser = mechanicalsoup.StatefulBrowser( + soup_config={'features': 'lxml'}, # Use the lxml HTML parser + raise_on_404=True, + user_agent='MyBot/0.1: mysite.example.com/bot_info', + ) + browser.open(url) + # ... + browser.close() + + Once not used anymore, the browser can be closed + using :func:`~Browser.close`. + """ + + def __init__(self, *args, **kwargs): + super(StatefulBrowser, self).__init__(*args, **kwargs) self.__debug = False self.__verbose = 0 - self.__current_page = None - self.__current_url = None - self.__current_form = None + self.__state = _BrowserState() def set_debug(self, debug): """Set the debug mode (off by default). @@ -37,38 +77,53 @@ """Set the verbosity level (an integer). * 0 means no verbose output. - * 1 shows one dot per visited page (looks like a progress bar) - - * >= 1 shows each visited URL.""" + * >= 1 shows each visited URL. + """ self.__verbose = verbose + def get_verbose(self): + """Get the verbosity level. See :func:`set_verbose()`.""" + return self.__verbose + def get_url(self): """Get the URL of the currently visited page.""" - return self.__current_url + return self.__state.url def get_current_form(self): - """Get the currently selected form. See select_form().""" - return self.__current_form + """Get the currently selected form as a :class:`Form` object. + See :func:`select_form`. + """ + return self.__state.form def __setitem__(self, name, value): - """Call item assignment on the currently selected form.""" + """Call item assignment on the currently selected form. + See :func:`Form.__setitem__`. + """ self.get_current_form()[name] = value def new_control(self, type, name, value, **kwargs): - """Call new_control() on the currently selected form.""" + """Call :func:`Form.new_control` on the currently selected form.""" return self.get_current_form().new_control(type, name, value, **kwargs) def get_current_page(self): """Get the current page as a soup object.""" - return self.__current_page + return self.__state.page def absolute_url(self, url): - """Make url absolute. url can be either relative or absolute.""" - return urllib.parse.urljoin(self.__current_url, url) + """Return the absolute URL made from the current URL and ``url``. + The current URL is only used to provide any missing components of + ``url``, as in the `.urljoin() method of urllib.parse + `__. + """ + return urllib.parse.urljoin(self.get_url(), url) def open(self, url, *args, **kwargs): - """Open the URL in this Browser object.""" + """Open the URL and store the Browser's state in this object. + All arguments are forwarded to :func:`Browser.get`. + + :return: Forwarded from :func:`Browser.get`. + """ if self.__verbose == 1: sys.stdout.write('.') sys.stdout.flush() @@ -76,62 +131,126 @@ print(url) resp = self.get(url, *args, **kwargs) - if hasattr(resp, 'soup'): - self.__current_page = resp.soup - self.__current_url = resp.url - self.__current_form = None + self.__state = _BrowserState(page=resp.soup, url=resp.url, + request=resp.request) return resp def open_fake_page(self, page_text, url=None, soup_config=None): - """Behave as if opening a page whose text is page_text, but do not - perform any network access. If url is set, pretend the page's URL - is url. Useful mainly for testing.""" - soup_config = soup_config or dict() - self.__current_page = bs4.BeautifulSoup( - page_text, **soup_config) - self.__current_url = url - self.__current_form = None + """Mock version of :func:`open`. + + Behave as if opening a page whose text is ``page_text``, but do not + perform any network access. If ``url`` is set, pretend it is the page's + URL. Useful mainly for testing. + """ + soup_config = soup_config or self.soup_config + self.__state = _BrowserState( + page=bs4.BeautifulSoup(page_text, **soup_config), + url=url) def open_relative(self, url, *args, **kwargs): - """Like open, but URL can be relative to the currently visited page.""" + """Like :func:`open`, but ``url`` can be relative to the currently + visited page. + """ return self.open(self.absolute_url(url), *args, **kwargs) - def select_form(self, *args, **kwargs): - """Select a form in the current page. Arguments are the same - as the select() method for a soup object.""" - found_forms = self.__current_page.select(*args, **kwargs) - if len(found_forms) < 1: - if self.__debug: - print('select_form failed for', *args) - self.launch_browser() - raise LinkNotFoundError() + def refresh(self): + """Reload the current page with the same request as originally done. + Any change (`select_form`, or any value filled-in in the form) made to + the current page before refresh is discarded. + + :raise ValueError: Raised if no refreshable page is loaded, e.g., when + using the shallow ``Browser`` wrapper functions. + + :return: Response of the request.""" + old_request = self.__state.request + if old_request is None: + raise ValueError('The current page is not refreshable. Either no ' + 'page is opened or low-level browser methods ' + 'were used to do so') + + resp = self.session.send(old_request) + Browser.add_soup(resp, self.soup_config) + self.__state = _BrowserState(page=resp.soup, url=resp.url, + request=resp.request) + return resp + + def select_form(self, selector="form", nr=0): + """Select a form in the current page. + + :param selector: CSS selector or a bs4.element.Tag object to identify + the form to select. + If not specified, ``selector`` defaults to "form", which is + useful if, e.g., there is only one form on the page. + For ``selector`` syntax, see the `.select() method in BeautifulSoup + `__. + :param nr: A zero-based index specifying which form among those that + match ``selector`` will be selected. Useful when one or more forms + have the same attributes as the form you want to select, and its + position on the page is the only way to uniquely identify it. + Default is the first matching form (``nr=0``). + + :return: The selected form as a soup object. It can also be + retrieved later with :func:`get_current_form`. + """ + if isinstance(selector, bs4.element.Tag): + if selector.name != "form": + raise LinkNotFoundError() + self.__state.form = Form(selector) + else: + # nr is a 0-based index for consistency with mechanize + found_forms = self.get_current_page().select(selector, + limit=nr + 1) + if len(found_forms) != nr + 1: + if self.__debug: + print('select_form failed for', selector) + self.launch_browser() + raise LinkNotFoundError() + self.__state.form = Form(found_forms[-1]) - self.__current_form = Form(found_forms[0]) - return self.__current_form + return self.get_current_form() def submit_selected(self, btnName=None, *args, **kwargs): - """Submit the form selected with select_form(). If there are multiple - submit input/button elements, use 'btnName' to choose between them.""" + """Submit the form that was selected with :func:`select_form`. + + :return: Forwarded from :func:`Browser.submit`. + + If there are multiple submit input/button elements, passes ``btnName`` + to :func:`Form.choose_submit` on the current form to choose between + them. All other arguments are forwarded to :func:`Browser.submit`. + """ if btnName is not None: self.get_current_form().choose_submit(btnName) - resp = self.submit(self.__current_form, - url=self.__current_url, + referer = self.get_url() + if referer is not None: + if 'headers' in kwargs: + kwargs['headers']['Referer'] = referer + else: + kwargs['headers'] = {'Referer': referer} + + resp = self.submit(self.__state.form, url=self.__state.url, *args, **kwargs) - self.__current_url = resp.url - if hasattr(resp, "soup"): - self.__current_page = resp.soup - self.__current_form = None + self.__state = _BrowserState(page=resp.soup, url=resp.url, + request=resp.request) return resp def list_links(self, *args, **kwargs): - """Display the list of links in the current page.""" + """Display the list of links in the current page. Arguments are + forwarded to :func:`links`. + """ print("Links in the current page:") for l in self.links(*args, **kwargs): print(" ", l) def links(self, url_regex=None, link_text=None, *args, **kwargs): - """Return links in the page, as a list of bs4.element.Tag object.""" + """Return links in the page, as a list of bs4.element.Tag objects. + + To return links matching specific criteria, specify ``url_regex`` + to match the *href*-attribute, or ``link_text`` to match the + *text*-attribute of the Tag. All other arguments are forwarded to + the `.find_all() method in BeautifulSoup + `__. + """ all_links = self.get_current_page().find_all( 'a', href=True, *args, **kwargs) if url_regex is not None: @@ -142,38 +261,112 @@ if a.text == link_text] return all_links - def find_link(self, url_regex=None, *args, **kwargs): - """Find a link whose href property matches url_regex. + def find_link(self, *args, **kwargs): + """Find and return a link, as a bs4.element.Tag object. - If several links match, return the first one found. + The search can be refined by specifying any argument that is accepted + by :func:`links`. If several links match, return the first one found. - If url_regex is None, return the first link found on the page.""" - links = self.links(url_regex, *args, **kwargs) + If no link is found, raise :class:`LinkNotFoundError`. + """ + links = self.links(*args, **kwargs) if len(links) == 0: raise LinkNotFoundError() else: return links[0] + def _find_link_internal(self, link, args, kwargs): + """Wrapper around find_link that deals with convenience special-cases: + + * If ``link`` has an *href*-attribute, then return it. If not, + consider it as a ``url_regex`` argument. + + * If searching for the link fails and debug is active, launch + a browser. + """ + if hasattr(link, 'attrs') and 'href' in link.attrs: + return link + + # Check if "link" parameter should be treated as "url_regex" + # but reject obtaining it from both places. + if link and 'url_regex' in kwargs: + raise ValueError('link parameter cannot be treated as ' + 'url_regex because url_regex is already ' + 'present in keyword arguments') + else: + kwargs['url_regex'] = link + + try: + return self.find_link(*args, **kwargs) + except LinkNotFoundError: + if self.get_debug(): + print('find_link failed for', kwargs) + self.list_links() + self.launch_browser() + raise + def follow_link(self, link=None, *args, **kwargs): - """Follow a previously found link + """Follow a link. - if the `link` argument doesn't have a 'href' attribute, treat - it as a url_regex and look it up with `find_link` + If ``link`` is a bs4.element.Tag (i.e. from a previous call to + :func:`links` or :func:`find_link`), then follow the link. - If the link is not found, Raise LinkNotFoundError. - Before raising LinkNotFoundError, if debug is activated, list - available links in the page and launch a browser.""" - if not hasattr(link, 'attrs') or 'href' not in link.attrs: - try: - link = self.find_link(link, *args, **kwargs) - except LinkNotFoundError: - if self.get_debug(): - print('follow_link failed for', link) - self.list_links() - self.launch_browser() - raise - return self.open(self.absolute_url(link['href'])) + If ``link`` doesn't have a *href*-attribute or is None, treat + ``link`` as a url_regex and look it up with :func:`find_link`. + Any additional arguments specified are forwarded to this function. + + If the link is not found, raise :class:`LinkNotFoundError`. + Before raising, if debug is activated, list available links in the + page and launch a browser. + + :return: Forwarded from :func:`open_relative`. + """ + link = self._find_link_internal(link, args, kwargs) - def launch_browser(self): - """Launch a browser on the page, for debugging purpose.""" - super(StatefulBrowser, self).launch_browser(self.get_current_page()) + referer = self.get_url() + headers = {'Referer': referer} if referer else None + + return self.open_relative(link['href'], headers=headers) + + def download_link(self, link=None, file=None, *args, **kwargs): + """Downloads the contents of a link to a file. This function behaves + similarly to :func:`follow_link`, but the browser state will + not change when calling this function. + + :param file: Filesystem path where the page contents will be + downloaded. If the file already exists, it will be overwritten. + + Other arguments are the same as :func:`follow_link` (``link`` + can either be a bs4.element.Tag or a URL regex, other + arguments are forwarded to :func:`find_link`). + + :return: `requests.Response + `__ + object. + """ + link = self._find_link_internal(link, args, kwargs) + url = self.absolute_url(link['href']) + + referer = self.get_url() + headers = {'Referer': referer} if referer else None + + response = self.session.get(url, headers=headers) + if self.raise_on_404 and response.status_code == 404: + raise LinkNotFoundError() + + # Save the response content to file + if file is not None: + with open(file, 'wb') as f: + f.write(response.content) + + return response + + def launch_browser(self, soup=None): + """Launch a browser to display a page, for debugging purposes. + + :param: soup: Page contents to display, supplied as a bs4 soup object. + Defaults to the current page of the ``StatefulBrowser`` instance. + """ + if soup is None: + soup = self.get_current_page() + super(StatefulBrowser, self).launch_browser(soup) diff -Nru python-mechanicalsoup-0.8.0/mechanicalsoup/utils.py python-mechanicalsoup-0.10.0/mechanicalsoup/utils.py --- python-mechanicalsoup-0.8.0/mechanicalsoup/utils.py 2017-02-12 08:33:43.000000000 +0000 +++ python-mechanicalsoup-0.10.0/mechanicalsoup/utils.py 2018-02-04 00:51:13.000000000 +0000 @@ -1,2 +1,16 @@ class LinkNotFoundError(BaseException): + """Exception raised when mechanicalsoup fails to find something. + + This happens in situations like (non-exhaustive list): + + * :func:`~mechanicalsoup.StatefulBrowser.find_link` is called, but + no link is found. + + * The browser was configured with raise_on_404=True and a 404 + error is triggered while browsing. + + * The user tried to fill-in a field which doesn't exist in a form + (e.g. browser["name"] = "val" with browser being a + StatefulBrowser). + """ pass diff -Nru python-mechanicalsoup-0.8.0/mechanicalsoup/__version__.py python-mechanicalsoup-0.10.0/mechanicalsoup/__version__.py --- python-mechanicalsoup-0.8.0/mechanicalsoup/__version__.py 2017-10-01 14:58:52.000000000 +0000 +++ python-mechanicalsoup-0.10.0/mechanicalsoup/__version__.py 2018-02-04 00:52:21.000000000 +0000 @@ -1,5 +1,6 @@ __title__ = 'MechanicalSoup' __description__ = 'A Python library for automating interaction with websites' -__url__ = 'https://github.com/hickford/MechanicalSoup' -__version__ = '0.8.0' +__url__ = 'https://mechanicalsoup.readthedocs.io/' +__github_url__ = 'https://github.com/MechanicalSoup/MechanicalSoup' +__version__ = '0.10.0' __license__ = 'MIT' diff -Nru python-mechanicalsoup-0.8.0/MechanicalSoup.egg-info/PKG-INFO python-mechanicalsoup-0.10.0/MechanicalSoup.egg-info/PKG-INFO --- python-mechanicalsoup-0.8.0/MechanicalSoup.egg-info/PKG-INFO 2017-10-04 18:16:51.000000000 +0000 +++ python-mechanicalsoup-0.10.0/MechanicalSoup.egg-info/PKG-INFO 2018-02-04 00:53:20.000000000 +0000 @@ -1,12 +1,139 @@ Metadata-Version: 1.1 Name: MechanicalSoup -Version: 0.8.0 +Version: 0.10.0 Summary: A Python library for automating interaction with websites -Home-page: https://github.com/hickford/MechanicalSoup +Home-page: https://mechanicalsoup.readthedocs.io/ Author: UNKNOWN Author-email: UNKNOWN License: MIT -Description: UNKNOWN +Description: MechanicalSoup + ============== + + Home page + --------- + + https://mechanicalsoup.readthedocs.io/ + + Overview + -------- + + A Python library for automating interaction with websites. + MechanicalSoup automatically stores and sends cookies, follows + redirects, and can follow links and submit forms. It doesn't do + JavaScript. + + MechanicalSoup was created by `M Hickford + `__, who was a fond user of the + `Mechanize `__ library. + Unfortunately, Mechanize is `incompatible with Python 3 + `__ and its development + stalled for several years. MechanicalSoup provides a similar API, built on Python + giants `Requests `__ (for + HTTP sessions) and `BeautifulSoup + `__ (for document + navigation). Since 2017 it is a project actively maintained by a small + team including `@hemberger `__ and `@moy + `__. + + |Gitter Chat| + + Installation + ------------ + + |Latest Version| |Supported Versions| + + PyPy and PyPy3 are also supported (and tested against). + + Download and install the latest released version from `PyPI `__:: + + pip install MechanicalSoup + + Download and install the development version from `GitHub `__:: + + pip install git+https://github.com/MechanicalSoup/MechanicalSoup + + Installing from source (installs the version in the current working directory):: + + python setup.py install + + (In all cases, add ``--user`` to the ``install`` command to + install in the current user's home directory.) + + + Documentation + ------------- + + The full documentation is available on + https://mechanicalsoup.readthedocs.io/. You may want to jump directly to + the `automatically generated API + documentation `__. + + Example + ------- + + From `examples/expl_duck_duck_go.py `__, code to get the results from + a DuckDuckGo search: + + .. code:: python + + """Example usage of MechanicalSoup to get the results from + DuckDuckGo.""" + + import mechanicalsoup + + # Connect to duckduckgo + browser = mechanicalsoup.StatefulBrowser() + browser.open("https://duckduckgo.com/") + + # Fill-in the search form + browser.select_form('#search_form_homepage') + browser["q"] = "MechanicalSoup" + browser.submit_selected() + + # Display the results + for link in browser.get_current_page().select('a.result__a'): + print(link.text, '->', link.attrs['href']) + + More examples are available in `examples/ `__. + + For an example with a more complex form (checkboxes, radio buttons and + textareas), read `tests/test_browser.py `__ + and `tests/test_form.py `__. + + Development + ----------- + + |Build Status| |Coverage Status| + |Requirements Status| |Documentation Status| + |CII Best Practices| + + Instructions for building, testing and contributing to MechanicalSoup: + see `CONTRIBUTING.rst `__. + + Common problems + --------------- + + Read the `FAQ + `__. + + + .. |Latest Version| image:: https://img.shields.io/pypi/v/MechanicalSoup.svg + :target: https://pypi.python.org/pypi/MechanicalSoup/ + .. |Supported Versions| image:: https://img.shields.io/pypi/pyversions/mechanicalsoup.svg + :target: https://pypi.python.org/pypi/MechanicalSoup/ + .. |Build Status| image:: https://travis-ci.org/MechanicalSoup/MechanicalSoup.svg?branch=master + :target: https://travis-ci.org/MechanicalSoup/MechanicalSoup + .. |Coverage Status| image:: https://codecov.io/gh/MechanicalSoup/MechanicalSoup/branch/master/graph/badge.svg + :target: https://codecov.io/gh/MechanicalSoup/MechanicalSoup + .. |Requirements Status| image:: https://requires.io/github/MechanicalSoup/MechanicalSoup/requirements.svg?branch=master + :target: https://requires.io/github/MechanicalSoup/MechanicalSoup/requirements/?branch=master + .. |Documentation Status| image:: https://readthedocs.org/projects/mechanicalsoup/badge/?version=latest + :target: https://mechanicalsoup.readthedocs.io/en/latest/?badge=latest + .. |CII Best Practices| image:: https://bestpractices.coreinfrastructure.org/projects/1334/badge + :target: https://bestpractices.coreinfrastructure.org/projects/1334) + .. |Gitter Chat| image:: https://badges.gitter.im/MechanicalSoup/MechanicalSoup.svg + :target: https://gitter.im/MechanicalSoup/Lobby + Platform: UNKNOWN Classifier: License :: OSI Approved :: MIT License Classifier: Programming Language :: Python :: 2 diff -Nru python-mechanicalsoup-0.8.0/MechanicalSoup.egg-info/requires.txt python-mechanicalsoup-0.10.0/MechanicalSoup.egg-info/requires.txt --- python-mechanicalsoup-0.8.0/MechanicalSoup.egg-info/requires.txt 2017-10-04 18:16:51.000000000 +0000 +++ python-mechanicalsoup-0.10.0/MechanicalSoup.egg-info/requires.txt 2018-02-04 00:53:20.000000000 +0000 @@ -1,3 +1,4 @@ requests >= 2.0 beautifulsoup4 six >= 1.4 +lxml diff -Nru python-mechanicalsoup-0.8.0/MechanicalSoup.egg-info/SOURCES.txt python-mechanicalsoup-0.10.0/MechanicalSoup.egg-info/SOURCES.txt --- python-mechanicalsoup-0.8.0/MechanicalSoup.egg-info/SOURCES.txt 2017-10-04 18:16:53.000000000 +0000 +++ python-mechanicalsoup-0.10.0/MechanicalSoup.egg-info/SOURCES.txt 2018-02-04 00:53:20.000000000 +0000 @@ -1,8 +1,7 @@ LICENSE MANIFEST.in -README.md -example.py -example_manual.py +README.rst +requirements.txt setup.cfg setup.py MechanicalSoup.egg-info/PKG-INFO @@ -10,12 +9,18 @@ MechanicalSoup.egg-info/dependency_links.txt MechanicalSoup.egg-info/requires.txt MechanicalSoup.egg-info/top_level.txt +examples/example.py +examples/example_manual.py mechanicalsoup/__init__.py mechanicalsoup/__version__.py mechanicalsoup/browser.py mechanicalsoup/form.py mechanicalsoup/stateful_browser.py mechanicalsoup/utils.py +tests/conftest.py +tests/requirements.txt +tests/setpath.py tests/test_browser.py tests/test_form.py -tests/test_stateful_browser.py \ No newline at end of file +tests/test_stateful_browser.py +tests/utils.py \ No newline at end of file diff -Nru python-mechanicalsoup-0.8.0/PKG-INFO python-mechanicalsoup-0.10.0/PKG-INFO --- python-mechanicalsoup-0.8.0/PKG-INFO 2017-10-04 18:16:53.000000000 +0000 +++ python-mechanicalsoup-0.10.0/PKG-INFO 2018-02-04 00:53:20.000000000 +0000 @@ -1,12 +1,139 @@ Metadata-Version: 1.1 Name: MechanicalSoup -Version: 0.8.0 +Version: 0.10.0 Summary: A Python library for automating interaction with websites -Home-page: https://github.com/hickford/MechanicalSoup +Home-page: https://mechanicalsoup.readthedocs.io/ Author: UNKNOWN Author-email: UNKNOWN License: MIT -Description: UNKNOWN +Description: MechanicalSoup + ============== + + Home page + --------- + + https://mechanicalsoup.readthedocs.io/ + + Overview + -------- + + A Python library for automating interaction with websites. + MechanicalSoup automatically stores and sends cookies, follows + redirects, and can follow links and submit forms. It doesn't do + JavaScript. + + MechanicalSoup was created by `M Hickford + `__, who was a fond user of the + `Mechanize `__ library. + Unfortunately, Mechanize is `incompatible with Python 3 + `__ and its development + stalled for several years. MechanicalSoup provides a similar API, built on Python + giants `Requests `__ (for + HTTP sessions) and `BeautifulSoup + `__ (for document + navigation). Since 2017 it is a project actively maintained by a small + team including `@hemberger `__ and `@moy + `__. + + |Gitter Chat| + + Installation + ------------ + + |Latest Version| |Supported Versions| + + PyPy and PyPy3 are also supported (and tested against). + + Download and install the latest released version from `PyPI `__:: + + pip install MechanicalSoup + + Download and install the development version from `GitHub `__:: + + pip install git+https://github.com/MechanicalSoup/MechanicalSoup + + Installing from source (installs the version in the current working directory):: + + python setup.py install + + (In all cases, add ``--user`` to the ``install`` command to + install in the current user's home directory.) + + + Documentation + ------------- + + The full documentation is available on + https://mechanicalsoup.readthedocs.io/. You may want to jump directly to + the `automatically generated API + documentation `__. + + Example + ------- + + From `examples/expl_duck_duck_go.py `__, code to get the results from + a DuckDuckGo search: + + .. code:: python + + """Example usage of MechanicalSoup to get the results from + DuckDuckGo.""" + + import mechanicalsoup + + # Connect to duckduckgo + browser = mechanicalsoup.StatefulBrowser() + browser.open("https://duckduckgo.com/") + + # Fill-in the search form + browser.select_form('#search_form_homepage') + browser["q"] = "MechanicalSoup" + browser.submit_selected() + + # Display the results + for link in browser.get_current_page().select('a.result__a'): + print(link.text, '->', link.attrs['href']) + + More examples are available in `examples/ `__. + + For an example with a more complex form (checkboxes, radio buttons and + textareas), read `tests/test_browser.py `__ + and `tests/test_form.py `__. + + Development + ----------- + + |Build Status| |Coverage Status| + |Requirements Status| |Documentation Status| + |CII Best Practices| + + Instructions for building, testing and contributing to MechanicalSoup: + see `CONTRIBUTING.rst `__. + + Common problems + --------------- + + Read the `FAQ + `__. + + + .. |Latest Version| image:: https://img.shields.io/pypi/v/MechanicalSoup.svg + :target: https://pypi.python.org/pypi/MechanicalSoup/ + .. |Supported Versions| image:: https://img.shields.io/pypi/pyversions/mechanicalsoup.svg + :target: https://pypi.python.org/pypi/MechanicalSoup/ + .. |Build Status| image:: https://travis-ci.org/MechanicalSoup/MechanicalSoup.svg?branch=master + :target: https://travis-ci.org/MechanicalSoup/MechanicalSoup + .. |Coverage Status| image:: https://codecov.io/gh/MechanicalSoup/MechanicalSoup/branch/master/graph/badge.svg + :target: https://codecov.io/gh/MechanicalSoup/MechanicalSoup + .. |Requirements Status| image:: https://requires.io/github/MechanicalSoup/MechanicalSoup/requirements.svg?branch=master + :target: https://requires.io/github/MechanicalSoup/MechanicalSoup/requirements/?branch=master + .. |Documentation Status| image:: https://readthedocs.org/projects/mechanicalsoup/badge/?version=latest + :target: https://mechanicalsoup.readthedocs.io/en/latest/?badge=latest + .. |CII Best Practices| image:: https://bestpractices.coreinfrastructure.org/projects/1334/badge + :target: https://bestpractices.coreinfrastructure.org/projects/1334) + .. |Gitter Chat| image:: https://badges.gitter.im/MechanicalSoup/MechanicalSoup.svg + :target: https://gitter.im/MechanicalSoup/Lobby + Platform: UNKNOWN Classifier: License :: OSI Approved :: MIT License Classifier: Programming Language :: Python :: 2 diff -Nru python-mechanicalsoup-0.8.0/README.md python-mechanicalsoup-0.10.0/README.md --- python-mechanicalsoup-0.8.0/README.md 2017-10-01 15:24:04.000000000 +0000 +++ python-mechanicalsoup-0.10.0/README.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,129 +0,0 @@ -MechanicalSoup -============== - -A Python library for automating interaction with websites. MechanicalSoup automatically stores and sends cookies, follows redirects, and can follow links and submit forms. It doesn't do Javascript. - -I was a fond user of the [Mechanize](https://github.com/jjlee/mechanize) library, but unfortunately it's [incompatible with Python 3](https://github.com/jjlee/mechanize/issues/96) and development is inactive. MechanicalSoup provides a similar API, built on Python giants [Requests](http://docs.python-requests.org/en/latest/) (for http sessions) and [BeautifulSoup](http://www.crummy.com/software/BeautifulSoup/) (for document navigation). - -Installation ------- - -[![Latest Version](https://img.shields.io/pypi/v/MechanicalSoup.svg)](https://pypi.python.org/pypi/MechanicalSoup/) - -From [PyPI](https://pypi.python.org/pypi/MechanicalSoup/) - - pip install MechanicalSoup - -Python versions 2.7, 3.4-3.6, PyPy and PyPy3 are supported (and tested against). - -Example ------- - -From [`example.py`](example.py), code to log into the GitHub website: - -```python -"""Example app to login to GitHub using the StatefulBrowser class.""" - -from __future__ import print_function -import argparse -import mechanicalsoup -from getpass import getpass - -parser = argparse.ArgumentParser(description="Login to GitHub.") -parser.add_argument("username") -args = parser.parse_args() - -args.password = getpass("Please enter your GitHub password: ") - -browser = mechanicalsoup.StatefulBrowser( - soup_config={'features': 'lxml'}, - raise_on_404=True, - user_agent='MyBot/0.1: mysite.example.com/bot_info', -) -# Uncomment for a more verbose output: -# browser.set_verbose(2) - -browser.open("https://github.com") -browser.follow_link("login") -browser.select_form('#login form') -browser["login"] = args.username -browser["password"] = args.password -resp = browser.submit_selected() - -# Uncomment to launch a web browser on the current page: -# browser.launch_browser() - -# verify we are now logged in -page = browser.get_current_page() -messages = page.find("div", class_="flash-messages") -if messages: - print(messages.text) -assert page.select(".logout-form") - -print(page.title.text) - -# verify we remain logged in (thanks to cookies) as we browse the rest of -# the site -page3 = browser.open("https://github.com/hickford/MechanicalSoup") -assert page3.soup.select(".logout-form") -``` - -For an example with a more complex form (checkboxes, radio buttons and textareas), read [`tests/test_browser.py`](tests/test_browser.py) and [`tests/test_form.py`](tests/test_form.py). - -Common problems ---- - -### "No parser was explicitly specified" - -> UserWarning: No parser was explicitly specified, so I'm using the best available HTML parser for this system ("lxml"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently. - -Recent versions of BeautifulSoup show a harmless warning to encourage you to specify which HTML parser to use. You can do this in MechanicalSoup: - - mechanicalsoup.Browser(soup_config={'features':'html.parser'}) - -Or if you have the parser [lxml](http://lxml.de/installation.html) installed: - - mechanicalsoup.Browser(soup_config={'features':'lxml'}) - -See also https://www.crummy.com/software/BeautifulSoup/bs4/doc/#you-need-a-parser - -Development ---------- - -[![Build Status](https://travis-ci.org/hickford/MechanicalSoup.svg?branch=master)](https://travis-ci.org/hickford/MechanicalSoup) -[![Coverage Status](https://codecov.io/gh/hickford/MechanicalSoup/branch/master/graph/badge.svg)](https://codecov.io/gh/hickford/MechanicalSoup) -[![Requirements Status](https://requires.io/github/hickford/MechanicalSoup/requirements.svg?branch=master)](https://requires.io/github/hickford/MechanicalSoup/requirements/?branch=master) - -You can develop against multiple versions of Python using [virtualenv](https://packaging.python.org/tutorials/installing-packages/#creating-virtual-environments): - - python3 -m venv .virtual-py3 && source .virtual-py3/bin/activate - pip install pytest pytest-cov flake8 requests_mock -and - - virtualenv -p python2 --no-site-packages .virtual-py2 && source .virtual-py2/bin/activate - pip install pytest pytest-cov flake8 requests_mock - -After making changes, check syntax: - - flake8 $(git ls-files mechanicalsoup/'*.py') example.py - -Then run py.test in all virtualenvs: - - source .virtual-py3/bin/activate - python setup.py install && pytest - - source .virtual-py2/bin/activate - python setup.py install && pytest - - -### Roadmap - -* Draw [Substack-style](http://substack.net/art) readme art (imagine a steaming bowl of cogs and noodles) -* [Write docs and publish website](https://github.com/hickford/MechanicalSoup/issues/6) - -See also ------- - -* [RoboBrowser](https://github.com/jmcarp/robobrowser): a similar library, also based on Requests and BeautifulSoup. -* [Hacker News post](https://news.ycombinator.com/item?id=8012103) -* [Reddit discussion](http://www.reddit.com/r/programming/comments/2aa13s/mechanicalsoup_a_python_library_for_automating/) diff -Nru python-mechanicalsoup-0.8.0/README.rst python-mechanicalsoup-0.10.0/README.rst --- python-mechanicalsoup-0.8.0/README.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-mechanicalsoup-0.10.0/README.rst 2018-02-04 00:51:13.000000000 +0000 @@ -0,0 +1,127 @@ +MechanicalSoup +============== + +Home page +--------- + +https://mechanicalsoup.readthedocs.io/ + +Overview +-------- + +A Python library for automating interaction with websites. +MechanicalSoup automatically stores and sends cookies, follows +redirects, and can follow links and submit forms. It doesn't do +JavaScript. + +MechanicalSoup was created by `M Hickford +`__, who was a fond user of the +`Mechanize `__ library. +Unfortunately, Mechanize is `incompatible with Python 3 +`__ and its development +stalled for several years. MechanicalSoup provides a similar API, built on Python +giants `Requests `__ (for +HTTP sessions) and `BeautifulSoup +`__ (for document +navigation). Since 2017 it is a project actively maintained by a small +team including `@hemberger `__ and `@moy +`__. + +|Gitter Chat| + +Installation +------------ + +|Latest Version| |Supported Versions| + +PyPy and PyPy3 are also supported (and tested against). + +Download and install the latest released version from `PyPI `__:: + + pip install MechanicalSoup + +Download and install the development version from `GitHub `__:: + + pip install git+https://github.com/MechanicalSoup/MechanicalSoup + +Installing from source (installs the version in the current working directory):: + + python setup.py install + +(In all cases, add ``--user`` to the ``install`` command to +install in the current user's home directory.) + + +Documentation +------------- + +The full documentation is available on +https://mechanicalsoup.readthedocs.io/. You may want to jump directly to +the `automatically generated API +documentation `__. + +Example +------- + +From ``__, code to get the results from +a DuckDuckGo search: + +.. code:: python + + """Example usage of MechanicalSoup to get the results from + DuckDuckGo.""" + + import mechanicalsoup + + # Connect to duckduckgo + browser = mechanicalsoup.StatefulBrowser() + browser.open("https://duckduckgo.com/") + + # Fill-in the search form + browser.select_form('#search_form_homepage') + browser["q"] = "MechanicalSoup" + browser.submit_selected() + + # Display the results + for link in browser.get_current_page().select('a.result__a'): + print(link.text, '->', link.attrs['href']) + +More examples are available in ``__. + +For an example with a more complex form (checkboxes, radio buttons and +textareas), read ``__ +and ``__. + +Development +----------- + +|Build Status| |Coverage Status| +|Requirements Status| |Documentation Status| +|CII Best Practices| + +Instructions for building, testing and contributing to MechanicalSoup: +see ``__. + +Common problems +--------------- + +Read the `FAQ +`__. + + +.. |Latest Version| image:: https://img.shields.io/pypi/v/MechanicalSoup.svg + :target: https://pypi.python.org/pypi/MechanicalSoup/ +.. |Supported Versions| image:: https://img.shields.io/pypi/pyversions/mechanicalsoup.svg + :target: https://pypi.python.org/pypi/MechanicalSoup/ +.. |Build Status| image:: https://travis-ci.org/MechanicalSoup/MechanicalSoup.svg?branch=master + :target: https://travis-ci.org/MechanicalSoup/MechanicalSoup +.. |Coverage Status| image:: https://codecov.io/gh/MechanicalSoup/MechanicalSoup/branch/master/graph/badge.svg + :target: https://codecov.io/gh/MechanicalSoup/MechanicalSoup +.. |Requirements Status| image:: https://requires.io/github/MechanicalSoup/MechanicalSoup/requirements.svg?branch=master + :target: https://requires.io/github/MechanicalSoup/MechanicalSoup/requirements/?branch=master +.. |Documentation Status| image:: https://readthedocs.org/projects/mechanicalsoup/badge/?version=latest + :target: https://mechanicalsoup.readthedocs.io/en/latest/?badge=latest +.. |CII Best Practices| image:: https://bestpractices.coreinfrastructure.org/projects/1334/badge + :target: https://bestpractices.coreinfrastructure.org/projects/1334) +.. |Gitter Chat| image:: https://badges.gitter.im/MechanicalSoup/MechanicalSoup.svg + :target: https://gitter.im/MechanicalSoup/Lobby diff -Nru python-mechanicalsoup-0.8.0/requirements.txt python-mechanicalsoup-0.10.0/requirements.txt --- python-mechanicalsoup-0.8.0/requirements.txt 1970-01-01 00:00:00.000000000 +0000 +++ python-mechanicalsoup-0.10.0/requirements.txt 2018-02-04 00:51:13.000000000 +0000 @@ -0,0 +1,4 @@ +requests >= 2.0 +beautifulsoup4 +six >= 1.4 +lxml diff -Nru python-mechanicalsoup-0.8.0/setup.cfg python-mechanicalsoup-0.10.0/setup.cfg --- python-mechanicalsoup-0.8.0/setup.cfg 2017-10-04 18:16:53.000000000 +0000 +++ python-mechanicalsoup-0.10.0/setup.cfg 2018-02-04 00:53:20.000000000 +0000 @@ -5,10 +5,19 @@ universal = 1 [tool:pytest] -addopts = --cov --cov-config .coveragerc +addopts = --cov --cov-config .coveragerc --flake8 -v +flake8-ignore = + docs/*.py ALL +python_files = tests/*.py + +[build_sphinx] +source-dir = docs/ +build-dir = docs/_build +all-files = 1 +fresh-env = 1 [egg_info] tag_build = -tag_svn_revision = 0 tag_date = 0 +tag_svn_revision = 0 diff -Nru python-mechanicalsoup-0.8.0/setup.py python-mechanicalsoup-0.10.0/setup.py --- python-mechanicalsoup-0.8.0/setup.py 2017-10-01 15:24:04.000000000 +0000 +++ python-mechanicalsoup-0.10.0/setup.py 2018-02-04 00:51:13.000000000 +0000 @@ -1,11 +1,33 @@ -from setuptools import setup, find_packages # Always prefer setuptools over distutils +from setuptools import setup # Always prefer setuptools over distutils from codecs import open # To use a consistent encoding from os import path +import re + + +def requirements_from_file(filename): + """Parses a pip requirements file into a list.""" + return [line.strip() for line in open(filename, 'r') + if line.strip() and not line.strip().startswith('--')] + + +def read(fname, URL): + """Read the content of a file.""" + readme = open(path.join(path.dirname(__file__), fname)).read() + if hasattr(readme, 'decode'): + # In Python 3, turn bytes into str. + readme = readme.decode('utf8') + # turn relative links into absolute ones + readme = re.sub(r'`<([^>]*)>`__', + r'`\1 <' + URL + r"/blob/master/\1>`__", + readme) + return readme + here = path.abspath(path.dirname(__file__)) about = {} -with open(path.join(here, 'mechanicalsoup', '__version__.py'), 'r', 'utf-8') as f: +with open(path.join(here, 'mechanicalsoup', '__version__.py'), + 'r', 'utf-8') as f: exec(f.read(), about) setup( @@ -15,7 +37,7 @@ version=about['__version__'], description=about['__description__'], - + long_description=read('README.rst', about['__github_url__']), url=about['__url__'], license=about['__license__'], @@ -35,21 +57,11 @@ packages=['mechanicalsoup'], - # List run-time dependencies here. These will be installed by pip when your - # project is installed. For an analysis of "install_requires" vs pip's - # requirements files see: + # List run-time dependencies here. These will be installed by pip + # when your project is installed. For an analysis of + # "install_requires" vs pip's requirements files see: # https://packaging.python.org/en/latest/requirements.html - install_requires=[ - 'requests >= 2.0', - 'beautifulsoup4', - 'six >= 1.4' - ], - setup_requires=[ - 'pytest-runner', - ], - tests_require=[ - 'pytest', - 'pytest-cov', - 'requests_mock' - ] + install_requires=requirements_from_file('requirements.txt'), + setup_requires=['pytest-runner'], + tests_require=requirements_from_file('tests/requirements.txt'), ) diff -Nru python-mechanicalsoup-0.8.0/tests/conftest.py python-mechanicalsoup-0.10.0/tests/conftest.py --- python-mechanicalsoup-0.8.0/tests/conftest.py 1970-01-01 00:00:00.000000000 +0000 +++ python-mechanicalsoup-0.10.0/tests/conftest.py 2018-02-04 00:51:13.000000000 +0000 @@ -0,0 +1,10 @@ +import pytest + +# This file is automatically discovered by pytest to define +# shared fixtures only once. + + +@pytest.fixture +def httpbin(): + from utils import HttpbinRemote + return HttpbinRemote() diff -Nru python-mechanicalsoup-0.8.0/tests/requirements.txt python-mechanicalsoup-0.10.0/tests/requirements.txt --- python-mechanicalsoup-0.8.0/tests/requirements.txt 1970-01-01 00:00:00.000000000 +0000 +++ python-mechanicalsoup-0.10.0/tests/requirements.txt 2018-02-04 00:51:13.000000000 +0000 @@ -0,0 +1,5 @@ +pytest +pytest-cov +pytest-flake8 +pytest-mock +requests_mock diff -Nru python-mechanicalsoup-0.8.0/tests/setpath.py python-mechanicalsoup-0.10.0/tests/setpath.py --- python-mechanicalsoup-0.8.0/tests/setpath.py 1970-01-01 00:00:00.000000000 +0000 +++ python-mechanicalsoup-0.10.0/tests/setpath.py 2018-02-04 00:51:13.000000000 +0000 @@ -0,0 +1,10 @@ +"""Add the main directory of the project to sys.path, so that +uninstalled version is tested.""" + +import sys +import os + +TEST_DIR = os.path.abspath(os.path.dirname(__file__)) +PROJ_DIR = os.path.dirname(TEST_DIR) + +sys.path.insert(0, os.path.join(PROJ_DIR)) diff -Nru python-mechanicalsoup-0.8.0/tests/test_browser.py python-mechanicalsoup-0.10.0/tests/test_browser.py --- python-mechanicalsoup-0.8.0/tests/test_browser.py 2017-10-01 12:16:30.000000000 +0000 +++ python-mechanicalsoup-0.10.0/tests/test_browser.py 2018-02-04 00:51:13.000000000 +0000 @@ -1,3 +1,4 @@ +import setpath # noqa:F401, must come before 'import mechanicalsoup' import mechanicalsoup import sys from bs4 import BeautifulSoup @@ -5,10 +6,11 @@ from requests.cookies import RequestsCookieJar import pytest -def test_submit_online(): + +def test_submit_online(httpbin): """Complete and submit the pizza form at http://httpbin.org/forms/post """ browser = mechanicalsoup.Browser() - page = browser.get("http://httpbin.org/forms/post") + page = browser.get(httpbin + "/forms/post") form = page.soup.form form.find("input", {"name": "custname"})["value"] = "Philip J. Fry" @@ -34,23 +36,24 @@ assert json["headers"]["User-Agent"].startswith('python-requests/') assert 'MechanicalSoup' in json["headers"]["User-Agent"] + form_html = """
- +
Pizza Size -

-

-

+

Small

+

Medium

+

Large

Pizza Toppings -

-

-

-

+

Bacon

+

Extra Cheese

+

Onion

+

Mushroom

-
- -
-
- - - -
-
- - -''' - -def setup_mock_browser(expected_post=None): - url = 'mock://multi-button-form.com' - mock = requests_mock.Adapter() - mock.register_uri('GET', url, headers={'Content-Type': 'text/html'}, text=choose_submit_form) - if expected_post: - def text_callback(request, context): - query = parse_qsl(request.text) - assert(set(query) == set(expected_post)) - return 'Success!' - mock.register_uri('POST', url + '/post', text=text_callback) - return mechanicalsoup.StatefulBrowser(requests_adapters={'mock': mock}), url - @pytest.mark.parametrize("expected_post", [ pytest.param( [ @@ -139,6 +101,7 @@ ''' + @pytest.mark.parametrize("select_name", [ pytest.param({'name': 'does_not_exist', 'fails': True}, id='not found'), pytest.param({'name': 'test_submit', 'fails': False}, id='found'), @@ -163,6 +126,7 @@ ''' + def test_choose_submit_multiple_match(): browser = mechanicalsoup.StatefulBrowser() browser.open_fake_page(choose_submit_multiple_match_form) @@ -183,18 +147,20 @@ ''' + def test_form_noaction(): browser, url = setup_mock_browser() browser.open_fake_page(submit_form_noaction, url=url) form = browser.select_form('#choose-submit-form') - browser['text1'] = 'newText1' + form['text1'] = 'newText1' res = browser.submit_selected() assert(res.status_code == 200 and browser.get_url() == url) + submit_form_action = ''' -
+ @@ -203,13 +169,202 @@ ''' + def test_form_action(): browser, url = setup_mock_browser() - browser.open_fake_page(submit_form_action, url="http://example.com/invalid/") + # for info about example.com see: https://tools.ietf.org/html/rfc2606 + browser.open_fake_page(submit_form_action, + url="http://example.com/invalid/") form = browser.select_form('#choose-submit-form') - browser['text1'] = 'newText1' + form['text1'] = 'newText1' res = browser.submit_selected() assert(res.status_code == 200 and browser.get_url() == url) + +set_select_form = ''' + + + + +
+ +''' + + +@pytest.mark.parametrize("option", [ + pytest.param({'result': [('entree', 'tofu')], 'default': True}, + id='default'), + pytest.param({'result': [('entree', 'curry')], 'default': False}, + id='selected'), +]) +def test_set_select(option): + '''Test the branch of Form.set that finds "select" elements.''' + browser, url = setup_mock_browser(expected_post=option['result'], + text=set_select_form) + browser.open(url) + browser.select_form('form') + if not option['default']: + browser[option['result'][0][0]] = option['result'][0][1] + res = browser.submit_selected() + assert(res.status_code == 200 and res.text == 'Success!') + + +set_select_multiple_form = ''' +
+ + +
+''' + + +@pytest.mark.parametrize("options", [ + pytest.param('bass', id='select one (str)'), + pytest.param(('bass',), id='select one (tuple)'), + pytest.param(('piano', 'violin'), id='select two'), +]) +def test_set_select_multiple(options): + """Test a This is a checkbox + + +''' + + +def test_form_check_uncheck(): + browser = mechanicalsoup.StatefulBrowser() + browser.open_fake_page(page_with_radio, url="http://example.com/invalid/") + form = browser.select_form('form') + assert "checked" not in form.form.find("input", {"name": "foo"}).attrs + + form["foo"] = True + assert form.form.find("input", {"name": "foo"}).attrs["checked"] == "" + + # Test explicit unchecking (skipping the call to Form.uncheck_all) + form.set_checkbox({"foo": False}, uncheck_other_boxes=False) + assert "checked" not in form.form.find("input", {"name": "foo"}).attrs + + +page_with_various_fields = ''' + +
+ + + +
+ Pizza Toppings +

+

+

+

+
+

Small

+

Medium

+

Large

+ +
+ +''' + + +def test_form_print_summary(capsys): + browser = mechanicalsoup.StatefulBrowser() + browser.open_fake_page(page_with_various_fields, + url="http://example.com/invalid/") + browser.select_form("form") + browser.get_current_form().print_summary() + out, err = capsys.readouterr() + # Different versions of bs4 show either or + # . Normalize before comparing. + out = out.replace('>', '/>') + assert out == """ + + + + + + + + + + +""" + assert err == "" + + +def test_issue180(): + """Test that a KeyError is not raised when Form.choose_submit is called + on a form where a submit element is missing its name-attribute.""" + browser = mechanicalsoup.StatefulBrowser() + html = ''' +
+ + +
+''' + browser.open_fake_page(html) + form = browser.select_form() + with pytest.raises(mechanicalsoup.utils.LinkNotFoundError): + form.choose_submit('not_found') + + if __name__ == '__main__': pytest.main(sys.argv) diff -Nru python-mechanicalsoup-0.8.0/tests/test_stateful_browser.py python-mechanicalsoup-0.10.0/tests/test_stateful_browser.py --- python-mechanicalsoup-0.8.0/tests/test_stateful_browser.py 2017-10-01 12:16:30.000000000 +0000 +++ python-mechanicalsoup-0.10.0/tests/test_stateful_browser.py 2018-02-04 00:51:13.000000000 +0000 @@ -1,25 +1,41 @@ +import os +import tempfile +import json +import setpath # noqa:F401, must come before 'import mechanicalsoup' import mechanicalsoup import sys import re from bs4 import BeautifulSoup -from test_form import setup_mock_browser +from utils import setup_mock_browser, prepare_mock_browser, mock_get import pytest +import webbrowser -def test_submit_online(): + +def test_request_forward(): + browser, url = setup_mock_browser(expected_post=[('var1', 'val1'), + ('var2', 'val2')]) + r = browser.request('POST', url + '/post', data={'var1': 'val1', + 'var2': 'val2'}) + assert r.text == 'Success!' + + +def test_submit_online(httpbin): """Complete and submit the pizza form at http://httpbin.org/forms/post """ browser = mechanicalsoup.StatefulBrowser() - browser.set_user_agent('testing https://github.com/hickford/MechanicalSoup') - browser.open("http://httpbin.org/") + browser.set_user_agent('testing MechanicalSoup') + browser.open(httpbin.url) for link in browser.links(): if link["href"] == "/": browser.follow_link(link) break browser.follow_link("forms/post") - assert browser.get_url() == "http://httpbin.org/forms/post" + assert browser.get_url() == httpbin + "/forms/post" browser.select_form("form") browser["custname"] = "Customer Name Here" browser["size"] = "medium" - browser["topping"] = ("cheese") + browser["topping"] = ("cheese", "bacon") + # Change our mind to make sure old boxes are unticked + browser["topping"] = ("cheese", "onion") browser["comments"] = "Some comment here" browser.get_current_form().set("nosuchfield", "new value", True) response = browser.submit_selected() @@ -28,54 +44,58 @@ assert data["custname"] == "Customer Name Here" assert data["custtel"] == "" # web browser submits "" for input left blank assert data["size"] == "medium" - assert data["topping"] == "cheese" + assert set(data["topping"]) == set(("cheese", "onion")) assert data["comments"] == "Some comment here" assert data["nosuchfield"] == "new value" - assert (json["headers"]["User-Agent"] == - 'testing https://github.com/hickford/MechanicalSoup') + assert json["headers"]["User-Agent"] == 'testing MechanicalSoup' # Ensure we haven't blown away any regular headers - assert set(('Content-Length', 'Host', 'Content-Type', 'Connection', 'Accept', - 'User-Agent', 'Accept-Encoding')).issubset(json["headers"].keys()) + expected_headers = ('Content-Length', 'Host', 'Content-Type', 'Connection', + 'Accept', 'User-Agent', 'Accept-Encoding') + assert set(expected_headers).issubset(json["headers"].keys()) -def test_no_404(): +def test_no_404(httpbin): browser = mechanicalsoup.StatefulBrowser() - resp = browser.open("http://httpbin.org/nosuchpage") + resp = browser.open(httpbin + "/nosuchpage") assert resp.status_code == 404 -def test_404(): + +def test_404(httpbin): browser = mechanicalsoup.StatefulBrowser(raise_on_404=True) - with pytest.raises(mechanicalsoup.LinkNotFoundError) as context: - resp = browser.open("http://httpbin.org/nosuchpage") - resp = browser.open("http://httpbin.org/") + with pytest.raises(mechanicalsoup.LinkNotFoundError): + resp = browser.open(httpbin + "/nosuchpage") + resp = browser.open(httpbin.url) assert resp.status_code == 200 -def test_user_agent(): + +def test_user_agent(httpbin): browser = mechanicalsoup.StatefulBrowser(user_agent='007') - resp = browser.open("http://httpbin.org/user-agent") + resp = browser.open(httpbin + "/user-agent") assert resp.json() == {'user-agent': '007'} -def test_open_relative(): + +def test_open_relative(httpbin): # Open an arbitrary httpbin page to set the current URL browser = mechanicalsoup.StatefulBrowser() - browser.open("http://httpbin.org/html") + browser.open(httpbin + "/html") # Open a relative page and make sure remote host and browser agree on URL resp = browser.open_relative("/get") - assert resp.json()['url'] == "http://httpbin.org/get" - assert browser.get_url() == "http://httpbin.org/get" + assert resp.json()['url'] == httpbin + "/get" + assert browser.get_url() == httpbin + "/get" # Test passing additional kwargs to the session resp = browser.open_relative("/basic-auth/me/123", auth=('me', '123')) - assert browser.get_url() == "http://httpbin.org/basic-auth/me/123" + assert browser.get_url() == httpbin + "/basic-auth/me/123" assert resp.json() == {"authenticated": True, "user": "me"} + def test_links(): browser = mechanicalsoup.StatefulBrowser() html = '''A Blue Link A Red Link''' - expected = [BeautifulSoup(html).a] + expected = [BeautifulSoup(html, "lxml").a] browser.open_fake_page(html) # Test StatefulBrowser.links url_regex argument @@ -95,7 +115,8 @@ # Test returning a non-singleton two_links = browser.links(id=re.compile('_link')) assert len(two_links) == 2 - assert two_links == BeautifulSoup(html).find_all('a') + assert two_links == BeautifulSoup(html, "lxml").find_all('a') + @pytest.mark.parametrize("expected_post", [ pytest.param( @@ -115,12 +136,13 @@ '''Tests that the btnName argument chooses the submit button.''' browser, url = setup_mock_browser(expected_post=expected_post) browser.open(url) - form = browser.select_form('#choose-submit-form') + browser.select_form('#choose-submit-form') browser['text'] = expected_post[2][1] browser['comment'] = expected_post[0][1] - res = browser.submit_selected(btnName = expected_post[1][0]) + res = browser.submit_selected(btnName=expected_post[1][0]) assert(res.status_code == 200 and res.text == 'Success!') + def test_get_set_debug(): browser = mechanicalsoup.StatefulBrowser() # Debug mode is off by default @@ -128,6 +150,7 @@ browser.set_debug(True) assert(browser.get_debug()) + def test_list_links(capsys): # capsys is a pytest fixture that allows us to inspect the std{err,out} browser = mechanicalsoup.StatefulBrowser() @@ -141,5 +164,457 @@ expected = 'Links in the current page:{0}'.format(links) assert out == expected + +def test_launch_browser(mocker): + browser = mechanicalsoup.StatefulBrowser() + browser.set_debug(True) + browser.open_fake_page('') + mocker.patch('webbrowser.open') + with pytest.raises(mechanicalsoup.LinkNotFoundError): + browser.follow_link('nosuchlink') + # mock.assert_called_once() not available on some versions :-( + assert webbrowser.open.call_count == 1 + mocker.resetall() + with pytest.raises(mechanicalsoup.LinkNotFoundError): + browser.select_form('nosuchlink') + # mock.assert_called_once() not available on some versions :-( + assert webbrowser.open.call_count == 1 + + +def test_find_link(): + browser = mechanicalsoup.StatefulBrowser() + browser.open_fake_page('') + with pytest.raises(mechanicalsoup.LinkNotFoundError): + browser.find_link('nosuchlink') + + +def test_verbose(capsys): + '''Tests that the btnName argument chooses the submit button.''' + browser, url = setup_mock_browser() + browser.open(url) + out, err = capsys.readouterr() + assert out == "" + assert err == "" + assert browser.get_verbose() == 0 + browser.set_verbose(1) + browser.open(url) + out, err = capsys.readouterr() + assert out == "." + assert err == "" + assert browser.get_verbose() == 1 + browser.set_verbose(2) + browser.open(url) + out, err = capsys.readouterr() + assert out == "mock://form.com\n" + assert err == "" + assert browser.get_verbose() == 2 + + +def test_new_control(): + browser = mechanicalsoup.StatefulBrowser() + browser.open("http://httpbin.org/forms/post") + browser.select_form("form") + with pytest.raises(mechanicalsoup.LinkNotFoundError): + # The control doesn't exist, yet. + browser["temperature"] = "cold" + browser["size"] = "large" # Existing radio + browser["comments"] = "This is a comment" # Existing textarea + browser.new_control("text", "temperature", "warm") + browser.new_control("textarea", "size", "Sooo big !") + browser.new_control("text", "comments", "This is an override comment") + browser.new_control("checkbox", "foo", "valval", checked="checked") + tag = browser.get_current_form().form.find("input", {"name": "foo"}) + assert tag.attrs["checked"] == "checked" + browser["temperature"] = "hot" + response = browser.submit_selected() + json = response.json() + data = json["form"] + print(data) + assert data["temperature"] == "hot" + assert data["size"] == "Sooo big !" + assert data["comments"] == "This is an override comment" + assert data["foo"] == "valval" + + +submit_form_noaction = ''' + + +
+ + + +
+ + +''' + + +def test_form_noaction(): + browser, url = setup_mock_browser() + browser.open_fake_page(submit_form_noaction) + browser.select_form('#choose-submit-form') + with pytest.raises(ValueError, message="no URL to submit to"): + browser.submit_selected() + + +submit_form_noname = ''' + + +
+ + +
+ + +''' + + +def test_form_noname(): + browser, url = setup_mock_browser(expected_post=[]) + browser.open_fake_page(submit_form_noname, url=url) + browser.select_form('#choose-submit-form') + response = browser.submit_selected() + assert(response.status_code == 200 and response.text == 'Success!') + + +submit_form_multiple = ''' + + +
+ +
+ + +''' + + +def test_form_multiple(): + browser, url = setup_mock_browser(expected_post=[('foo', 'tempeh'), + ('foo', 'tofu')]) + browser.open_fake_page(submit_form_multiple, url=url) + browser.select_form('#choose-submit-form') + response = browser.submit_selected() + assert(response.status_code == 200 and response.text == 'Success!') + + +def test_upload_file(httpbin): + browser = mechanicalsoup.StatefulBrowser() + browser.open(httpbin + "/forms/post") + + # Create two temporary files to upload + def make_file(content): + path = tempfile.mkstemp()[1] + with open(path, "w") as f: + f.write(content) + return path + path1, path2 = (make_file(content) for content in + ("first file content", "second file content")) + + # The form doesn't have a type=file field, but the target action + # does show it => add the fields ourselves. + browser.select_form() + browser.new_control("file", "first", path1) + browser.new_control("file", "second", "") + browser["second"] = path2 + browser.get_current_form().print_summary() + response = browser.submit_selected() + files = response.json()["files"] + assert files["first"] == "first file content" + assert files["second"] == "second file content" + + +def test_with(): + """Test that __enter__/__exit__ properly create/close the browser.""" + with mechanicalsoup.StatefulBrowser() as browser: + assert browser.session is not None + assert browser.session is None + + +def test_select_form_nr(): + """Test the nr option of select_form.""" + forms = """
""" + with mechanicalsoup.StatefulBrowser() as browser: + browser.open_fake_page(forms) + form = browser.select_form() + assert form.form['id'] == "a" + form = browser.select_form(nr=1) + assert form.form['id'] == "b" + form = browser.select_form(nr=2) + assert form.form['id'] == "c" + with pytest.raises(mechanicalsoup.LinkNotFoundError): + browser.select_form(nr=3) + + +def test_select_form_tag_object(): + """Test tag object as selector parameter type""" + forms = """

""" + soup = BeautifulSoup(forms, "lxml") + with mechanicalsoup.StatefulBrowser() as browser: + browser.open_fake_page(forms) + form = browser.select_form(soup.find("form", {"id": "b"})) + assert form.form['id'] == "b" + with pytest.raises(mechanicalsoup.LinkNotFoundError): + browser.select_form(soup.find("p")) + + +def test_referer_follow_link(httpbin): + browser = mechanicalsoup.StatefulBrowser() + browser.open(httpbin.url) + response = browser.follow_link("/headers") + referer = response.json()["headers"]["Referer"] + actual_ref = re.sub('/*$', '', referer) + expected_ref = re.sub('/*$', '', httpbin.url) + assert actual_ref == expected_ref + + +submit_form_headers = ''' + + +
+ + + +
+ + +''' + + +def test_referer_submit(httpbin): + browser = mechanicalsoup.StatefulBrowser() + ref = "https://example.com/my-referer" + page = submit_form_headers.format(httpbin.url + "/headers") + browser.open_fake_page(page, url=ref) + browser.select_form() + response = browser.submit_selected() + headers = response.json()["headers"] + referer = headers["Referer"] + actual_ref = re.sub('/*$', '', referer) + assert actual_ref == ref + + +def test_referer_submit_headers(httpbin): + browser = mechanicalsoup.StatefulBrowser() + ref = "https://example.com/my-referer" + page = submit_form_headers.format(httpbin.url + "/headers") + browser.open_fake_page(page, url=ref) + browser.select_form() + response = browser.submit_selected( + headers={'X-Test-Header': 'x-test-value'}) + headers = response.json()["headers"] + referer = headers["Referer"] + actual_ref = re.sub('/*$', '', referer) + assert actual_ref == ref + assert headers['X-Test-Header'] == 'x-test-value' + + +def test_link_arg_text(httpbin): + browser = mechanicalsoup.StatefulBrowser() + browser.open_fake_page('Link', httpbin.url) + browser.follow_link(link_text='Link') + assert browser.get_url() == httpbin + '/get' + + +def test_link_arg_regex(httpbin): + browser = mechanicalsoup.StatefulBrowser() + browser.open_fake_page('Link', httpbin.url) + browser.follow_link(url_regex='.*') + assert browser.get_url() == httpbin + '/get' + + +def test_link_arg_multiregex(httpbin): + browser = mechanicalsoup.StatefulBrowser() + browser.open_fake_page('Link', httpbin.url) + with pytest.raises(ValueError, match="link parameter cannot be .*"): + browser.follow_link('foo', url_regex='bar') + + +def file_get_contents(filename): + with open(filename, "rb") as f: + return f.read() + + +def test_download_link(httpbin): + """Test downloading the contents of a link to file.""" + browser = mechanicalsoup.StatefulBrowser() + browser.open(httpbin.url) + tmpdir = tempfile.mkdtemp() + tmpfile = tmpdir + '/nosuchfile.png' + current_url = browser.get_url() + current_page = browser.get_current_page() + response = browser.download_link(file=tmpfile, link='image/png') + + # Check that the browser state has not changed + assert browser.get_url() == current_url + assert browser.get_current_page() == current_page + + # Check that the file was downloaded + assert os.path.isfile(tmpfile) + assert file_get_contents(tmpfile) == response.content + # Check that we actually downloaded a PNG file + assert response.content[:4] == b'\x89PNG' + + +def test_download_link_nofile(httpbin): + """Test downloading the contents of a link without saving it.""" + browser = mechanicalsoup.StatefulBrowser() + browser.open(httpbin.url) + current_url = browser.get_url() + current_page = browser.get_current_page() + response = browser.download_link(link='image/png') + + # Check that the browser state has not changed + assert browser.get_url() == current_url + assert browser.get_current_page() == current_page + + # Check that we actually downloaded a PNG file + assert response.content[:4] == b'\x89PNG' + + +def test_download_link_to_existing_file(httpbin): + """Test downloading the contents of a link to an existing file.""" + browser = mechanicalsoup.StatefulBrowser() + browser.open(httpbin.url) + tmpdir = tempfile.mkdtemp() + tmpfile = tmpdir + '/existing.png' + with open(tmpfile, "w") as f: + f.write("initial content") + current_url = browser.get_url() + current_page = browser.get_current_page() + response = browser.download_link('image/png', tmpfile) + + # Check that the browser state has not changed + assert browser.get_url() == current_url + assert browser.get_current_page() == current_page + + # Check that the file was downloaded + assert os.path.isfile(tmpfile) + assert file_get_contents(tmpfile) == response.content + # Check that we actually downloaded a PNG file + assert response.content[:4] == b'\x89PNG' + + +def test_download_link_404(httpbin): + """Test downloading the contents of a broken link.""" + browser = mechanicalsoup.StatefulBrowser(raise_on_404=True) + browser.open_fake_page('Link', + url=httpbin.url) + tmpdir = tempfile.mkdtemp() + tmpfile = tmpdir + '/nosuchfile.txt' + current_url = browser.get_url() + current_page = browser.get_current_page() + with pytest.raises(mechanicalsoup.LinkNotFoundError): + browser.download_link(file=tmpfile, link_text='Link') + + # Check that the browser state has not changed + assert browser.get_url() == current_url + assert browser.get_current_page() == current_page + + # Check that the file was not downloaded + assert not os.path.exists(tmpfile) + + +def test_download_link_referer(httpbin): + """Test downloading the contents of a link to file.""" + browser = mechanicalsoup.StatefulBrowser() + ref = httpbin + "/my-referer" + browser.open_fake_page('Link', + url=ref) + tmpfile = tempfile.NamedTemporaryFile() + current_url = browser.get_url() + current_page = browser.get_current_page() + browser.download_link(file=tmpfile.name, link_text='Link') + + # Check that the browser state has not changed + assert browser.get_url() == current_url + assert browser.get_current_page() == current_page + + # Check that the file was downloaded + with open(tmpfile.name) as f: + json_data = json.load(f) + headers = json_data["headers"] + assert headers["Referer"] == ref + + +def test_refresh_open(): + url = 'mock://example.com' + initial_page = BeautifulSoup('

Fake empty page

', 'lxml') + reload_page = BeautifulSoup('

Fake reloaded page

', 'lxml') + + browser, adapter = prepare_mock_browser() + mock_get(adapter, url=url, reply=str(initial_page)) + browser.open(url) + mock_get(adapter, url=url, reply=str(reload_page), + additional_matcher=lambda r: 'Referer' not in r.headers) + + browser.refresh() + + assert browser.get_url() == url + assert browser.get_current_page() == reload_page + + +def test_refresh_follow_link(): + url = 'mock://example.com' + follow_url = 'mock://example.com/followed' + initial_content = 'Link'.format(url=follow_url) + initial_page = BeautifulSoup(initial_content, 'lxml') + reload_page = BeautifulSoup('

Fake reloaded page

', 'lxml') + + browser, adapter = prepare_mock_browser() + mock_get(adapter, url=url, reply=str(initial_page)) + mock_get(adapter, url=follow_url, reply=str(initial_page)) + browser.open(url) + browser.follow_link() + refer_header = {'Referer': url} + mock_get(adapter, url=follow_url, reply=str(reload_page), + request_headers=refer_header) + + browser.refresh() + + assert browser.get_url() == follow_url + assert browser.get_current_page() == reload_page + + +def test_refresh_form_not_retained(): + url = 'mock://example.com' + initial_content = '
Here comes the form
' + initial_page = BeautifulSoup(initial_content, 'lxml') + reload_page = BeautifulSoup('

Fake reloaded page

', 'lxml') + + browser, adapter = prepare_mock_browser() + mock_get(adapter, url=url, reply=str(initial_page)) + browser.open(url) + browser.select_form() + mock_get(adapter, url=url, reply=str(reload_page), + additional_matcher=lambda r: 'Referer' not in r.headers) + + browser.refresh() + + assert browser.get_url() == url + assert browser.get_current_page() == reload_page + assert browser.get_current_form() is None + + +def test_refresh_error(): + browser = mechanicalsoup.StatefulBrowser() + + # Test no page + with pytest.raises(ValueError): + browser.refresh() + + # Test fake page + with pytest.raises(ValueError): + browser.open_fake_page('

Fake empty page

', url='http://fake.com') + browser.refresh() + + if __name__ == '__main__': pytest.main(sys.argv) diff -Nru python-mechanicalsoup-0.8.0/tests/utils.py python-mechanicalsoup-0.10.0/tests/utils.py --- python-mechanicalsoup-0.8.0/tests/utils.py 1970-01-01 00:00:00.000000000 +0000 +++ python-mechanicalsoup-0.10.0/tests/utils.py 2018-02-04 00:51:13.000000000 +0000 @@ -0,0 +1,78 @@ +import mechanicalsoup +import requests_mock +try: + from urllib.parse import parse_qsl +except ImportError: + from urlparse import parse_qsl + +""" +Utilities for testing MechanicalSoup. +""" + +choose_submit_form = ''' + + + +
+ +
+ +
+
+ + + +
+
+ + +''' + + +def setup_mock_browser(expected_post=None, text=choose_submit_form): + url = 'mock://form.com' + browser, mock = prepare_mock_browser() + mock_get(mock, url, text) + + if expected_post is not None: + mock_post(mock, url + '/post', expected_post) + + return browser, url + + +def prepare_mock_browser(scheme='mock'): + mock = requests_mock.Adapter() + browser = mechanicalsoup.StatefulBrowser(requests_adapters={scheme: mock}) + + return browser, mock + + +def mock_get(mocked_adapter, url, reply, content_type='text/html', **kwargs): + headers = {'Content-Type': content_type} + mocked_adapter.register_uri('GET', url, headers=headers, text=reply, + **kwargs) + + +def mock_post(mocked_adapter, url, expected, reply='Success!'): + def text_callback(request, context): + # Python 2's parse_qsl doesn't like None argument + query = parse_qsl(request.text) if request.text else () + assert (set(query) == set(expected)) + return reply + + mocked_adapter.register_uri('POST', url, text=text_callback) + + +class HttpbinRemote: + """Drop-in replacement for pytest-httpbin's httpbin fixture + that uses the remote httpbin server instead of a local one.""" + def __init__(self): + self.url = "http://httpbin.org" + + def __add__(self, x): + return self.url + x