diff -Nru gallery-dl-1.22.1/CHANGELOG.md gallery-dl-1.22.3/CHANGELOG.md --- gallery-dl-1.22.1/CHANGELOG.md 2022-06-04 17:41:17.000000000 +0000 +++ gallery-dl-1.22.3/CHANGELOG.md 2022-06-28 20:42:25.000000000 +0000 @@ -1,5 +1,37 @@ # Changelog +## 1.22.3 - 2022-06-28 +### Changes +- [twitter] revert strategy changes for user URLs ([#2712](https://github.com/mikf/gallery-dl/issues/2712), [#2710](https://github.com/mikf/gallery-dl/issues/2710)) +- update default User-Agent headers + +## 1.22.2 - 2022-06-27 +### Additions +- [cyberdrop] add fallback URLs ([#2668](https://github.com/mikf/gallery-dl/issues/2668)) +- [horne] add support for horne.red ([#2700](https://github.com/mikf/gallery-dl/issues/2700)) +- [itaku] add `gallery` and `image` extractors ([#1842](https://github.com/mikf/gallery-dl/issues/1842)) +- [poipiku] add `user` and `post` extractors ([#1602](https://github.com/mikf/gallery-dl/issues/1602)) +- [skeb] add `following` extractor ([#2698](https://github.com/mikf/gallery-dl/issues/2698)) +- [twitter] implement `expand` option ([#2665](https://github.com/mikf/gallery-dl/issues/2665)) +- [twitter] implement `csrf` option ([#2676](https://github.com/mikf/gallery-dl/issues/2676)) +- [unsplash] add `collection_title` and `collection_id` metadata fields ([#2670](https://github.com/mikf/gallery-dl/issues/2670)) +- [weibo] support `tabtype=video` listings ([#2601](https://github.com/mikf/gallery-dl/issues/2601)) +- [formatter] implement slice operator as format specifier +- support cygwin/BSD/etc for `--cookies-from-browser` +### Fixes +- [instagram] improve metadata generated by `_parse_post_api()` ([#2695](https://github.com/mikf/gallery-dl/issues/2695), [#2660](https://github.com/mikf/gallery-dl/issues/2660)) +- [instagram} fix `tag` extractor ([#2659](https://github.com/mikf/gallery-dl/issues/2659)) +- [instagram] automatically invalidate expired login sessions +- [twitter] fix pagination for conversion tweets +- [twitter] improve `"replies": "self"` ([#2665](https://github.com/mikf/gallery-dl/issues/2665)) +- [twitter] improve strategy for user URLs ([#2665](https://github.com/mikf/gallery-dl/issues/2665)) +- [vk] take URLs from `*_src` entries ([#2535](https://github.com/mikf/gallery-dl/issues/2535)) +- [weibo] fix URLs generated by `user` extractor ([#2601](https://github.com/mikf/gallery-dl/issues/2601)) +- [weibo] fix retweets ([#2601](https://github.com/mikf/gallery-dl/issues/2601)) +- [downloader:ytdl] update `_set_outtmpl()` ([#2692](https://github.com/mikf/gallery-dl/issues/2692)) +- [formatter] fix `!j` conversion for non-serializable types ([#2624](https://github.com/mikf/gallery-dl/issues/2624)) +- [snap] Fix missing libslang dependency ([#2655](https://github.com/mikf/gallery-dl/issues/2655)) + ## 1.22.1 - 2022-06-04 ### Additions - [gfycat] add support for collections ([#2629](https://github.com/mikf/gallery-dl/issues/2629)) diff -Nru gallery-dl-1.22.1/data/man/gallery-dl.1 gallery-dl-1.22.3/data/man/gallery-dl.1 --- gallery-dl-1.22.1/data/man/gallery-dl.1 2022-06-04 17:41:17.000000000 +0000 +++ gallery-dl-1.22.3/data/man/gallery-dl.1 2022-06-28 20:42:25.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2022-06-04" "1.22.1" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2022-06-28" "1.22.3" "gallery-dl Manual" .\" disable hyphenation .nh diff -Nru gallery-dl-1.22.1/data/man/gallery-dl.conf.5 gallery-dl-1.22.3/data/man/gallery-dl.conf.5 --- gallery-dl-1.22.1/data/man/gallery-dl.conf.5 2022-06-04 17:41:17.000000000 +0000 +++ gallery-dl-1.22.3/data/man/gallery-dl.conf.5 2022-06-28 20:42:25.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2022-06-04" "1.22.1" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2022-06-28" "1.22.3" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -596,7 +596,7 @@ \f[I]string\f[] .IP "Default:" 9 -\f[I]"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0"\f[] +\f[I]"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0"\f[] .IP "Description:" 4 User-Agent header value to be used for HTTP requests. @@ -1702,6 +1702,17 @@ Download video files. +.SS extractor.itaku.videos +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Download video files. + + .SS extractor.kemonoparty.comments .IP "Type:" 6 \f[I]bool\f[] @@ -2585,6 +2596,22 @@ \f[]. +.SS extractor.twitter.csrf +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"cookies"\f[] + +.IP "Description:" 4 +Controls how to handle Cross Site Request Forgery (CSRF) tokens. + +.br +* \f[I]"auto"\f[]: Always auto-generate a token. +.br +* \f[I]"cookies"\f[]: Use token given by the \f[I]ct0\f[] cookie if present. + + .SS extractor.twitter.size .IP "Type:" 6 \f[I]list\f[] of \f[I]strings\f[] @@ -2831,7 +2858,7 @@ when processing a user profile. Possible values are -\f[I]"home"\f[], \f[I]"feed"\f[], \f[I]"videos"\f[], \f[I]"article"\f[], \f[I]"album"\f[]. +\f[I]"home"\f[], \f[I]"feed"\f[], \f[I]"videos"\f[], \f[I]"newvideo"\f[], \f[I]"article"\f[], \f[I]"album"\f[]. It is possible to use \f[I]"all"\f[] instead of listing all values separately. diff -Nru gallery-dl-1.22.1/debian/changelog gallery-dl-1.22.3/debian/changelog --- gallery-dl-1.22.1/debian/changelog 2022-06-06 00:08:12.000000000 +0000 +++ gallery-dl-1.22.3/debian/changelog 2022-07-01 21:42:29.000000000 +0000 @@ -1,8 +1,14 @@ -gallery-dl (1.22.1-1~bpo20.04.1) focal-backports; urgency=medium +gallery-dl (1.22.3-1~bpo20.04.1) focal-backports; urgency=medium * No-change backport to focal. - -- Unit 193 Sun, 05 Jun 2022 20:08:12 -0400 + -- Unit 193 Fri, 01 Jul 2022 17:42:29 -0400 + +gallery-dl (1.22.3-1) unstable; urgency=medium + + * New upstream version 1.22.3. + + -- Unit 193 Tue, 28 Jun 2022 19:54:50 -0400 gallery-dl (1.22.1-1) unstable; urgency=medium diff -Nru gallery-dl-1.22.1/docs/gallery-dl.conf gallery-dl-1.22.3/docs/gallery-dl.conf --- gallery-dl-1.22.1/docs/gallery-dl.conf 2022-06-04 14:04:13.000000000 +0000 +++ gallery-dl-1.22.3/docs/gallery-dl.conf 2022-06-28 19:11:29.000000000 +0000 @@ -10,7 +10,7 @@ "proxy": null, "skip": true, - "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0", + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0", "retries": 4, "timeout": 30.0, "verify": true, diff -Nru gallery-dl-1.22.1/gallery_dl/cookies.py gallery-dl-1.22.3/gallery_dl/cookies.py --- gallery-dl-1.22.1/gallery_dl/cookies.py 2022-05-30 10:58:03.000000000 +0000 +++ gallery-dl-1.22.3/gallery_dl/cookies.py 2022-06-11 13:30:11.000000000 +0000 @@ -152,13 +152,11 @@ def _firefox_browser_directory(): - if sys.platform in ("linux", "linux2"): - return os.path.expanduser("~/.mozilla/firefox") - if sys.platform == "win32": + if sys.platform in ("win32", "cygwin"): return os.path.expandvars(R"%APPDATA%\Mozilla\Firefox\Profiles") if sys.platform == "darwin": return os.path.expanduser("~/Library/Application Support/Firefox") - raise ValueError("unsupported platform '{}'".format(sys.platform)) + return os.path.expanduser("~/.mozilla/firefox") # -------------------------------------------------------------------- @@ -277,20 +275,7 @@ # /src/+/HEAD/docs/user_data_dir.md join = os.path.join - if sys.platform in ("linux", "linux2"): - config = (os.environ.get("XDG_CONFIG_HOME") or - os.path.expanduser("~/.config")) - - browser_dir = { - "brave" : join(config, "BraveSoftware/Brave-Browser"), - "chrome" : join(config, "google-chrome"), - "chromium": join(config, "chromium"), - "edge" : join(config, "microsoft-edge"), - "opera" : join(config, "opera"), - "vivaldi" : join(config, "vivaldi"), - }[browser_name] - - elif sys.platform == "win32": + if sys.platform in ("win32", "cygwin"): appdata_local = os.path.expandvars("%LOCALAPPDATA%") appdata_roaming = os.path.expandvars("%APPDATA%") browser_dir = { @@ -315,7 +300,16 @@ }[browser_name] else: - raise ValueError("unsupported platform '{}'".format(sys.platform)) + config = (os.environ.get("XDG_CONFIG_HOME") or + os.path.expanduser("~/.config")) + browser_dir = { + "brave" : join(config, "BraveSoftware/Brave-Browser"), + "chrome" : join(config, "google-chrome"), + "chromium": join(config, "chromium"), + "edge" : join(config, "microsoft-edge"), + "opera" : join(config, "opera"), + "vivaldi" : join(config, "vivaldi"), + }[browser_name] # Linux keyring names can be determined by snooping on dbus # while opening the browser in KDE: @@ -379,16 +373,13 @@ def get_cookie_decryptor(browser_root, browser_keyring_name, *, keyring=None): - if sys.platform in ("linux", "linux2"): - return LinuxChromeCookieDecryptor( - browser_keyring_name, keyring=keyring) + if sys.platform in ("win32", "cygwin"): + return WindowsChromeCookieDecryptor(browser_root) elif sys.platform == "darwin": return MacChromeCookieDecryptor(browser_keyring_name) - elif sys.platform == "win32": - return WindowsChromeCookieDecryptor(browser_root) else: - raise NotImplementedError("Chrome cookie decryption is not supported " - "on {}".format(sys.platform)) + return LinuxChromeCookieDecryptor( + browser_keyring_name, keyring=keyring) class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): diff -Nru gallery-dl-1.22.1/gallery_dl/downloader/ytdl.py gallery-dl-1.22.3/gallery_dl/downloader/ytdl.py --- gallery-dl-1.22.1/gallery_dl/downloader/ytdl.py 2022-05-03 10:22:33.000000000 +0000 +++ gallery-dl-1.22.3/gallery_dl/downloader/ytdl.py 2022-06-20 09:32:02.000000000 +0000 @@ -138,9 +138,14 @@ @staticmethod def _set_outtmpl(ytdl_instance, outtmpl): try: - ytdl_instance.outtmpl_dict["default"] = outtmpl + ytdl_instance._parse_outtmpl except AttributeError: - ytdl_instance.params["outtmpl"] = outtmpl + try: + ytdl_instance.outtmpl_dict["default"] = outtmpl + except AttributeError: + ytdl_instance.params["outtmpl"] = outtmpl + else: + ytdl_instance.params["outtmpl"] = {"default": outtmpl} def compatible_formats(formats): diff -Nru gallery-dl-1.22.1/gallery_dl/extractor/common.py gallery-dl-1.22.3/gallery_dl/extractor/common.py --- gallery-dl-1.22.1/gallery_dl/extractor/common.py 2022-06-04 10:36:15.000000000 +0000 +++ gallery-dl-1.22.3/gallery_dl/extractor/common.py 2022-06-28 19:11:29.000000000 +0000 @@ -256,7 +256,7 @@ else: headers["User-Agent"] = self.config("user-agent", ( "Mozilla/5.0 (Windows NT 10.0; Win64; x64; " - "rv:91.0) Gecko/20100101 Firefox/91.0")) + "rv:102.0) Gecko/20100101 Firefox/102.0")) headers["Accept"] = "*/*" headers["Accept-Language"] = "en-US,en;q=0.5" headers["Accept-Encoding"] = "gzip, deflate" @@ -713,16 +713,21 @@ HTTP_HEADERS = { "firefox": ( - ("User-Agent", "Mozilla/5.0 ({}; rv:91.0) " - "Gecko/20100101 Firefox/91.0"), + ("User-Agent", "Mozilla/5.0 ({}; rv:102.0) " + "Gecko/20100101 Firefox/102.0"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9," - "image/avif,*/*;q=0.8"), + "image/avif,image/webp,*/*;q=0.8"), ("Accept-Language", "en-US,en;q=0.5"), - ("Accept-Encoding", "gzip, deflate"), + ("Accept-Encoding", "gzip, deflate, br"), ("Referer", None), + ("DNT", "1"), ("Connection", "keep-alive"), ("Upgrade-Insecure-Requests", "1"), ("Cookie", None), + ("Sec-Fetch-Dest", "empty"), + ("Sec-Fetch-Mode", "no-cors"), + ("Sec-Fetch-Site", "same-origin"), + ("TE", "trailers"), ), "chrome": ( ("Upgrade-Insecure-Requests", "1"), @@ -755,8 +760,7 @@ "AES128-GCM-SHA256:" "AES256-GCM-SHA384:" "AES128-SHA:" - "AES256-SHA:" - "DES-CBC3-SHA" + "AES256-SHA" ), "chrome": ( "TLS_AES_128_GCM_SHA256:" diff -Nru gallery-dl-1.22.1/gallery_dl/extractor/cyberdrop.py gallery-dl-1.22.3/gallery_dl/extractor/cyberdrop.py --- gallery-dl-1.22.1/gallery_dl/extractor/cyberdrop.py 2022-05-30 10:58:03.000000000 +0000 +++ gallery-dl-1.22.3/gallery_dl/extractor/cyberdrop.py 2022-06-12 16:41:54.000000000 +0000 @@ -48,10 +48,11 @@ files = [] append = files.append while True: - url = extr('id="file" href="', '"') + url = text.unescape(extr('id="file" href="', '"')) if not url: break - append({"file": text.unescape(url)}) + append({"file": url, + "_fallback": (self.root + url[url.find("/", 8):],)}) return files, { "album_id" : self.album_id, diff -Nru gallery-dl-1.22.1/gallery_dl/extractor/__init__.py gallery-dl-1.22.3/gallery_dl/extractor/__init__.py --- gallery-dl-1.22.1/gallery_dl/extractor/__init__.py 2022-05-30 10:58:03.000000000 +0000 +++ gallery-dl-1.22.3/gallery_dl/extractor/__init__.py 2022-06-20 09:32:02.000000000 +0000 @@ -64,6 +64,7 @@ "inkbunny", "instagram", "issuu", + "itaku", "kabeuchi", "keenspot", "kemonoparty", @@ -106,6 +107,7 @@ "pixiv", "pixnet", "plurk", + "poipiku", "pornhub", "pururin", "reactor", diff -Nru gallery-dl-1.22.1/gallery_dl/extractor/instagram.py gallery-dl-1.22.3/gallery_dl/extractor/instagram.py --- gallery-dl-1.22.1/gallery_dl/extractor/instagram.py 2022-06-03 16:58:20.000000000 +0000 +++ gallery-dl-1.22.3/gallery_dl/extractor/instagram.py 2022-06-24 21:41:18.000000000 +0000 @@ -82,8 +82,12 @@ if response.history: - url = response.request.url + url = response.url if "/accounts/login/" in url: + if self._username: + self.log.debug("Invalidating cached login session for " + "'%s'", self._username) + _login_impl.invalidate(self._username) page = "login" elif "/challenge/" in url: page = "challenge" @@ -161,55 +165,15 @@ return self._pagination_api(endpoint) def login(self): + self._username = None if not self._check_cookies(self.cookienames): username, password = self._get_auth_info() if username: - self._update_cookies(self._login_impl(username, password)) + self._username = username + self._update_cookies(_login_impl(self, username, password)) self.session.cookies.set( "csrftoken", self.csrf_token, domain=self.cookiedomain) - @cache(maxage=360*24*3600, keyarg=1) - def _login_impl(self, username, password): - self.log.info("Logging in as %s", username) - - url = self.root + "/accounts/login/" - page = self.request(url).text - - headers = { - "X-Web-Device-Id" : text.extract(page, '"device_id":"', '"')[0], - "X-IG-App-ID" : "936619743392459", - "X-ASBD-ID" : "437806", - "X-IG-WWW-Claim" : "0", - "X-Requested-With": "XMLHttpRequest", - "Referer" : url, - } - url = self.root + "/data/shared_data/" - data = self.request(url, headers=headers).json() - - headers["X-CSRFToken"] = data["config"]["csrf_token"] - headers["X-Instagram-AJAX"] = data["rollout_hash"] - headers["Origin"] = self.root - data = { - "username" : username, - "enc_password" : "#PWD_INSTAGRAM_BROWSER:0:{}:{}".format( - int(time.time()), password), - "queryParams" : "{}", - "optIntoOneTap" : "false", - "stopDeletionNonce" : "", - "trustedDeviceRecords": "{}", - } - url = self.root + "/accounts/login/ajax/" - response = self.request(url, method="POST", headers=headers, data=data) - - if not response.json().get("authenticated"): - raise exception.AuthenticationError() - - cget = self.session.cookies.get - return { - name: cget(name) - for name in ("sessionid", "mid", "ig_did") - } - def _parse_post_graphql(self, post): typename = post["__typename"] @@ -286,37 +250,51 @@ return data def _parse_post_api(self, post): - - if "media" in post: - media = post["media"] - owner = media["user"] + if "items" in post: + items = post["items"] + reel_id = str(post["id"]).rpartition(":")[2] data = { - "post_id" : media["pk"], - "post_shortcode": shortcode_from_id(media["pk"]), + "expires": text.parse_timestamp(post.get("expiring_at")), + "post_id": reel_id, + "post_shortcode": shortcode_from_id(reel_id), } + else: + data = { + "post_id" : post["pk"], + "post_shortcode": post["code"], + "likes": post["like_count"], + } + + caption = post["caption"] + data["description"] = caption["text"] if caption else "" + + tags = self._find_tags(data["description"]) + if tags: + data["tags"] = sorted(set(tags)) + + location = post.get("location") + if location: + slug = location["short_name"].replace(" ", "-").lower() + data["location_id"] = location["pk"] + data["location_slug"] = slug + data["location_url"] = "{}/explore/locations/{}/{}/".format( + self.root, location["pk"], slug) - if "carousel_media" in media: - post["items"] = media["carousel_media"] + if "carousel_media" in post: + items = post["carousel_media"] data["sidecar_media_id"] = data["post_id"] data["sidecar_shortcode"] = data["post_shortcode"] else: - post["items"] = (media,) - - else: - reel_id = str(post["id"]).rpartition(":")[2] - owner = post["user"] - data = { - "expires" : text.parse_timestamp(post.get("expiring_at")), - "post_id" : reel_id, - "post_shortcode": shortcode_from_id(reel_id), - } + items = (post,) + owner = post["user"] data["owner_id"] = owner["pk"] data["username"] = owner.get("username") data["fullname"] = owner.get("full_name") - data["_files"] = files = [] + data["post_url"] = "{}/p/{}/".format(self.root, data["post_shortcode"]) - for num, item in enumerate(post["items"], 1): + data["_files"] = files = [] + for num, item in enumerate(items, 1): image = item["image_versions2"]["candidates"][0] @@ -333,7 +311,8 @@ media = { "num" : num, "date" : text.parse_timestamp(item.get("taken_at") or - media.get("taken_at")), + media.get("taken_at") or + post.get("taken_at")), "media_id" : item["pk"], "shortcode" : (item.get("code") or shortcode_from_id(item["pk"])), @@ -342,6 +321,10 @@ "width" : media["width"], "height" : media["height"], } + + if "expiring_at" in item: + media["expires"] = text.parse_timestamp(post["expiring_at"]) + self._extract_tagged_users(item, media) files.append(media) @@ -385,31 +368,6 @@ "username" : user["username"], "full_name": user["full_name"]}) - def _extract_shared_data(self, page): - shared_data, pos = text.extract( - page, "window._sharedData =", ";") - additional_data, pos = text.extract( - page, "window.__additionalDataLoaded(", ");", pos) - - data = json.loads(shared_data) - if additional_data: - next(iter(data["entry_data"].values()))[0] = \ - json.loads(additional_data.partition(",")[2]) - return data - - def _get_edge_data(self, user, key): - cursor = self.config("cursor") - if cursor or not key: - return { - "edges" : (), - "page_info": { - "end_cursor" : cursor, - "has_next_page": True, - "_virtual" : True, - }, - } - return user[key] - def _pagination_graphql(self, query_hash, variables): cursor = self.config("cursor") if cursor: @@ -436,8 +394,7 @@ def _pagination_api(self, endpoint, params=None): while True: data = self._request_api(endpoint, params=params) - for item in data["items"]: - yield {"media": item} + yield from data["items"] if not data["more_available"]: return @@ -446,7 +403,8 @@ def _pagination_api_post(self, endpoint, params, post=False): while True: data = self._request_api(endpoint, method="POST", data=params) - yield from data["items"] + for item in data["items"]: + yield item["media"] info = data["paging_info"] if not info["more_available"]: @@ -567,21 +525,7 @@ return {"tag": text.unquote(self.item)} def posts(self): - url = "{}/explore/tags/{}/".format(self.root, self.item) - page = self._extract_shared_data( - self.request(url).text)["entry_data"]["TagPage"][0] - - if "data" in page: - return self._pagination_sections(page["data"]["recent"]) - - hashtag = page["graphql"]["hashtag"] - query_hash = "9b498c08113f1e09617a1703c22b2f32" - variables = {"tag_name": hashtag["name"], "first": 50} - edge = self._get_edge_data(hashtag, "edge_hashtag_to_media") - return self._pagination_graphql(query_hash, variables, edge) - - def _pagination_sections(self, info): - endpoint = "/v1/tags/instagram/sections/" + endpoint = "/v1/tags/{}/sections/".format(self.item) data = { "include_persistent": "0", "max_id" : None, @@ -591,29 +535,17 @@ } while True: + info = self._request_api(endpoint, method="POST", data=data) + for section in info["sections"]: - yield from section["layout_content"]["medias"] + for media in section["layout_content"]["medias"]: + yield media["media"] if not info.get("more_available"): return data["max_id"] = info["next_max_id"] data["page"] = info["next_page"] - info = self._request_api(endpoint, method="POST", data=data) - - def _pagination_graphql(self, query_hash, variables, data): - while True: - for edge in data["edges"]: - yield edge["node"] - - info = data["page_info"] - if not info["has_next_page"]: - return - - variables["after"] = self._cursor = info["end_cursor"] - self.log.debug("Cursor: %s", self._cursor) - data = self._request_graphql( - query_hash, variables)["hashtag"]["edge_hashtag_to_media"] class InstagramPostExtractor(InstagramExtractor): @@ -812,6 +744,49 @@ return self._pagination_api_post(endpoint, data) +@cache(maxage=360*24*3600, keyarg=1) +def _login_impl(extr, username, password): + extr.log.info("Logging in as %s", username) + + url = extr.root + "/accounts/login/" + page = extr.request(url).text + + headers = { + "X-Web-Device-Id" : text.extract(page, '"device_id":"', '"')[0], + "X-IG-App-ID" : "936619743392459", + "X-ASBD-ID" : "437806", + "X-IG-WWW-Claim" : "0", + "X-Requested-With": "XMLHttpRequest", + "Referer" : url, + } + url = extr.root + "/data/shared_data/" + data = extr.request(url, headers=headers).json() + + headers["X-CSRFToken"] = data["config"]["csrf_token"] + headers["X-Instagram-AJAX"] = data["rollout_hash"] + headers["Origin"] = extr.root + data = { + "username" : username, + "enc_password" : "#PWD_INSTAGRAM_BROWSER:0:{}:{}".format( + int(time.time()), password), + "queryParams" : "{}", + "optIntoOneTap" : "false", + "stopDeletionNonce" : "", + "trustedDeviceRecords": "{}", + } + url = extr.root + "/accounts/login/ajax/" + response = extr.request(url, method="POST", headers=headers, data=data) + + if not response.json().get("authenticated"): + raise exception.AuthenticationError() + + cget = extr.session.cookies.get + return { + name: cget(name) + for name in ("sessionid", "mid", "ig_did") + } + + def id_from_shortcode(shortcode): return util.bdecode(shortcode, _ALPHABET) diff -Nru gallery-dl-1.22.1/gallery_dl/extractor/itaku.py gallery-dl-1.22.3/gallery_dl/extractor/itaku.py --- gallery-dl-1.22.1/gallery_dl/extractor/itaku.py 1970-01-01 00:00:00.000000000 +0000 +++ gallery-dl-1.22.3/gallery_dl/extractor/itaku.py 2022-06-20 17:47:47.000000000 +0000 @@ -0,0 +1,183 @@ +# -*- coding: utf-8 -*- + +# Copyright 2022 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://itaku.ee/""" + +from .common import Extractor, Message +from ..cache import memcache +from .. import text + +BASE_PATTERN = r"(?:https?://)?itaku\.ee" + + +class ItakuExtractor(Extractor): + """Base class for itaku extractors""" + category = "itaku" + root = "https://itaku.ee" + directory_fmt = ("{category}", "{owner_username}") + filename_fmt = ("{id}{title:? //}.{extension}") + archive_fmt = "{id}" + request_interval = (0.5, 1.5) + + def __init__(self, match): + Extractor.__init__(self, match) + self.api = ItakuAPI(self) + self.item = match.group(1) + self.videos = self.config("videos", True) + + def items(self): + for post in self.posts(): + + post["date"] = text.parse_datetime( + post["date_added"], "%Y-%m-%dT%H:%M:%S.%f") + for category, tags in post.pop("categorized_tags").items(): + post["tags_" + category.lower()] = [t["name"] for t in tags] + post["tags"] = [t["name"] for t in post["tags"]] + post["sections"] = [s["title"] for s in post["sections"]] + + if post["video"] and self.videos: + url = post["video"]["video"] + else: + url = post["image"] + + yield Message.Directory, post + yield Message.Url, url, text.nameext_from_url(url, post) + + +class ItakuGalleryExtractor(ItakuExtractor): + """Extractor for posts from an itaku user gallery""" + subcategory = "gallery" + pattern = BASE_PATTERN + r"/profile/([^/?#]+)/gallery" + test = ("https://itaku.ee/profile/piku/gallery", { + "pattern": r"https://d1wmr8tlk3viaj\.cloudfront\.net/gallery_imgs" + r"/[^/?#]+\.(jpg|png|gif)", + "range": "1-10", + "count": 10, + }) + + def posts(self): + return self.api.galleries_images(self.item) + + +class ItakuImageExtractor(ItakuExtractor): + subcategory = "image" + pattern = BASE_PATTERN + r"/images/(\d+)" + test = ( + ("https://itaku.ee/images/100471", { + "pattern": r"https://d1wmr8tlk3viaj\.cloudfront\.net/gallery_imgs" + r"/220504_oUNIAFT\.png", + "count": 1, + "keyword": { + "already_pinned": None, + "blacklisted": { + "blacklisted_tags": [], + "is_blacklisted": False + }, + "can_reshare": True, + "date_added": "2022-05-05T19:21:17.674148Z", + "date_edited": "2022-05-25T14:37:46.220612Z", + "description": "sketch from drawpile", + "extension": "png", + "filename": "220504_oUNIAFT", + "hotness_score": 11507.4691939, + "id": 100471, + "image": "https://d1wmr8tlk3viaj.cloudfront.net/gallery_imgs" + "/220504_oUNIAFT.png", + "image_xl": "https://d1wmr8tlk3viaj.cloudfront.net" + "/gallery_imgs/220504_oUNIAFT/xl.jpg", + "liked_by_you": False, + "maturity_rating": "SFW", + "num_comments": 2, + "num_likes": 80, + "num_reshares": 2, + "obj_tags": 136446, + "owner": 16775, + "owner_avatar": "https://d1wmr8tlk3viaj.cloudfront.net" + "/profile_pics/av2022r_vKYVywc/sm.jpg", + "owner_displayname": "Piku", + "owner_username": "piku", + "reshared_by_you": False, + "sections": ["Miku"], + "tags": list, + "tags_character": ["hatsune_miku"], + "tags_copyright": ["vocaloid"], + "tags_general" : ["twintails", "green_hair", "flag", "gloves", + "green_eyes", "female", "racing_miku"], + "title": "Racing Miku 2022 Ver.", + "too_mature": False, + "uncompressed_filesize": "0.62", + "video": None, + "visibility": "PUBLIC", + }, + }), + # video + ("https://itaku.ee/images/19465", { + "pattern": r"https://d1wmr8tlk3viaj\.cloudfront\.net/gallery_vids" + r"/sleepy_af_OY5GHWw\.mp4", + }), + ) + + def posts(self): + return (self.api.image(self.item),) + + +class ItakuAPI(): + + def __init__(self, extractor): + self.extractor = extractor + self.root = extractor.root + "/api" + self.headers = { + "Accept": "application/json, text/plain, */*", + "Referer": extractor.root + "/", + } + + def galleries_images(self, username, section=None): + endpoint = "/galleries/images/" + params = { + "cursor" : None, + "owner" : self.user(username)["owner"], + "section" : section, + "date_range": "", + "maturity_rating": ("SFW", "Questionable", "NSFW", "Extreme"), + "ordering" : "-date_added", + "page" : "1", + "page_size" : "30", + "visibility": ("PUBLIC", "PROFILE_ONLY"), + } + return self._pagination(endpoint, params, self.image) + + def image(self, image_id): + endpoint = "/galleries/images/" + str(image_id) + return self._call(endpoint) + + @memcache() + def user(self, username): + return self._call("/user_profiles/{}/".format(username)) + + def _call(self, endpoint, params=None): + if not endpoint.startswith("http"): + endpoint = self.root + endpoint + response = self.extractor.request( + endpoint, params=params, headers=self.headers) + return response.json() + + def _pagination(self, endpoint, params, extend): + data = self._call(endpoint, params) + + while True: + if extend: + for result in data["results"]: + yield extend(result["id"]) + else: + yield from data["results"] + + url_next = data["links"].get("next") + if not url_next: + return + + data = self._call(url_next) diff -Nru gallery-dl-1.22.1/gallery_dl/extractor/lolisafe.py gallery-dl-1.22.3/gallery_dl/extractor/lolisafe.py --- gallery-dl-1.22.1/gallery_dl/extractor/lolisafe.py 2022-06-01 09:28:47.000000000 +0000 +++ gallery-dl-1.22.3/gallery_dl/extractor/lolisafe.py 2022-06-13 12:07:19.000000000 +0000 @@ -85,6 +85,8 @@ yield Message.Directory, data for data["num"], file in enumerate(files, 1): url = file["file"] + if "_fallback" in file: + data["_fallback"] = file["_fallback"] text.nameext_from_url(url, data) data["name"], sep, data["id"] = data["filename"].rpartition("-") diff -Nru gallery-dl-1.22.1/gallery_dl/extractor/nijie.py gallery-dl-1.22.3/gallery_dl/extractor/nijie.py --- gallery-dl-1.22.1/gallery_dl/extractor/nijie.py 2022-05-30 10:58:03.000000000 +0000 +++ gallery-dl-1.22.3/gallery_dl/extractor/nijie.py 2022-06-25 14:51:40.000000000 +0000 @@ -6,31 +6,31 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for https://nijie.info/""" +"""Extractors for nijie instances""" -from .common import Extractor, Message, AsynchronousMixin +from .common import BaseExtractor, Message, AsynchronousMixin from .. import text, exception from ..cache import cache -BASE_PATTERN = r"(?:https?://)?(?:www\.)?nijie\.info" - - -class NijieExtractor(AsynchronousMixin, Extractor): +class NijieExtractor(AsynchronousMixin, BaseExtractor): """Base class for nijie extractors""" - category = "nijie" + basecategory = "Nijie" directory_fmt = ("{category}", "{user_id}") filename_fmt = "{image_id}_p{num}.{extension}" archive_fmt = "{image_id}_{num}" - cookiedomain = "nijie.info" - cookienames = ("nemail", "nlogin") - root = "https://nijie.info" - view_url = "https://nijie.info/view.php?id=" - popup_url = "https://nijie.info/view_popup.php?id=" def __init__(self, match): - Extractor.__init__(self, match) - self.user_id = text.parse_int(match.group(1)) + self._init_category(match) + self.cookiedomain = "." + self.root.rpartition("/")[2] + self.cookienames = (self.category + "_tok",) + + if self.category == "horne": + self._extract_data = self._extract_data_horne + + BaseExtractor.__init__(self, match) + + self.user_id = text.parse_int(match.group(match.lastindex)) self.user_name = None self.session.headers["Referer"] = self.root + "/" @@ -39,13 +39,21 @@ for image_id in self.image_ids(): - response = self.request(self.view_url + image_id, fatal=False) + url = "{}/view.php?id={}".format(self.root, image_id) + response = self.request(url, fatal=False) if response.status_code >= 400: continue page = response.text data = self._extract_data(page) data["image_id"] = text.parse_int(image_id) + + if self.user_name: + data["user_id"] = self.user_id + data["user_name"] = self.user_name + else: + data["user_id"] = data["artist_id"] + data["user_name"] = data["artist_name"] yield Message.Directory, data for image in self._extract_images(page): @@ -68,24 +76,41 @@ "description": text.unescape(extr( '"description": "', '"').replace("&", "&")), "date" : text.parse_datetime(extr( - '"datePublished": "', '"') + "+0900", - "%a %b %d %H:%M:%S %Y%z"), - "artist_id" : text.parse_int(extr( - '"sameAs": "https://nijie.info/members.php?id=', '"')), + '"datePublished": "', '"'), "%a %b %d %H:%M:%S %Y", 9), + "artist_id" : text.parse_int(extr('/members.php?id=', '"')), + "artist_name": keywords[1], + "tags" : keywords[2:-1], + } + return data + + @staticmethod + def _extract_data_horne(page): + """Extract image metadata from 'page'""" + extr = text.extract_from(page) + keywords = text.unescape(extr( + 'name="keywords" content="', '" />')).split(",") + data = { + "title" : keywords[0].strip(), + "description": text.unescape(extr( + 'property="og:description" content="', '"')), + "artist_id" : text.parse_int(extr('members.php?id=', '"')), "artist_name": keywords[1], "tags" : keywords[2:-1], + "date" : text.parse_datetime(extr( + "itemprop='datePublished' content=", "<").rpartition(">")[2], + "%Y-%m-%d %H:%M:%S", 9), } - data["user_id"] = data["artist_id"] - data["user_name"] = data["artist_name"] return data @staticmethod def _extract_images(page): """Extract image URLs from 'page'""" - images = text.extract_iter(page, '") for num, image in enumerate(images): - url = "https:" + text.extract(image, 'src="', '"')[0] - url = url.replace("/__rs_l120x120/", "/") + src = text.extract(image, 'src="', '"')[0] + if not src: + continue + url = ("https:" + src).replace("/__rs_l120x120/", "/") yield text.nameext_from_url(url, { "num": num, "url": url, @@ -112,7 +137,7 @@ data = {"email": username, "password": password, "save": "on"} response = self.request(url, method="POST", data=data) - if "//nijie.info/login.php" in response.text: + if "/login.php" in response.text: raise exception.AuthenticationError() return self.session.cookies @@ -132,12 +157,27 @@ params["p"] += 1 +BASE_PATTERN = NijieExtractor.update({ + "nijie": { + "root": "https://nijie.info", + "pattern": r"(?:www\.)?nijie\.info", + }, + "horne": { + "root": "https://horne.red", + "pattern": r"(?:www\.)?horne\.red", + }, +}) + + class NijieUserExtractor(NijieExtractor): """Extractor for nijie user profiles""" subcategory = "user" cookiedomain = None pattern = BASE_PATTERN + r"/members\.php\?id=(\d+)" - test = ("https://nijie.info/members.php?id=44",) + test = ( + ("https://nijie.info/members.php?id=44"), + ("https://horne.red/members.php?id=58000"), + ) def items(self): fmt = "{}/{{}}.php?id={}".format(self.root, self.user_id).format @@ -172,6 +212,25 @@ "user_name": "ED", }, }), + ("https://horne.red/members_illust.php?id=58000", { + "pattern": r"https://pic\.nijie\.net/\d+/horne/\d+/\d+/\d+" + r"/illust/\d+_\d+_[0-9a-f]+_[0-9a-f]+\.png", + "range": "1-20", + "count": 20, + "keyword": { + "artist_id": 58000, + "artist_name": "のえるわ", + "date": "type:datetime", + "description": str, + "image_id": int, + "num": int, + "tags": list, + "title": str, + "url": str, + "user_id": 58000, + "user_name": "のえるわ", + }, + }), ("https://nijie.info/members_illust.php?id=43", { "exception": exception.NotFoundError, }), @@ -182,34 +241,47 @@ class NijieDoujinExtractor(NijieExtractor): - """Extractor for doujin entries of a nijie-user""" + """Extractor for doujin entries of a nijie user""" subcategory = "doujin" pattern = BASE_PATTERN + r"/members_dojin\.php\?id=(\d+)" - test = ("https://nijie.info/members_dojin.php?id=6782", { - "count": ">= 18", - "keyword": { - "user_id" : 6782, - "user_name": "ジョニー@アビオン村", - }, - }) + test = ( + ("https://nijie.info/members_dojin.php?id=6782", { + "count": ">= 18", + "keyword": { + "user_id" : 6782, + "user_name": "ジョニー@アビオン村", + }, + }), + ("https://horne.red/members_dojin.php?id=58000"), + ) def image_ids(self): return self._pagination("members_dojin") class NijieFavoriteExtractor(NijieExtractor): - """Extractor for all favorites/bookmarks of a nijie-user""" + """Extractor for all favorites/bookmarks of a nijie user""" subcategory = "favorite" directory_fmt = ("{category}", "bookmarks", "{user_id}") archive_fmt = "f_{user_id}_{image_id}_{num}" pattern = BASE_PATTERN + r"/user_like_illust_view\.php\?id=(\d+)" - test = ("https://nijie.info/user_like_illust_view.php?id=44", { - "count": ">= 16", - "keyword": { - "user_id" : 44, - "user_name": "ED", - }, - }) + test = ( + ("https://nijie.info/user_like_illust_view.php?id=44", { + "count": ">= 16", + "keyword": { + "user_id" : 44, + "user_name": "ED", + }, + }), + ("https://horne.red/user_like_illust_view.php?id=58000", { + "range": "1-5", + "count": 5, + "keyword": { + "user_id" : 58000, + "user_name": "のえるわ", + }, + }), + ) def image_ids(self): return self._pagination("user_like_illust_view") @@ -227,14 +299,17 @@ directory_fmt = ("{category}", "nuita", "{user_id}") archive_fmt = "n_{user_id}_{image_id}_{num}" pattern = BASE_PATTERN + r"/history_nuita\.php\?id=(\d+)" - test = ("https://nijie.info/history_nuita.php?id=728995", { - "range": "1-10", - "count": 10, - "keyword": { - "user_id" : 728995, - "user_name": "莚", - }, - }) + test = ( + ("https://nijie.info/history_nuita.php?id=728995", { + "range": "1-10", + "count": 10, + "keyword": { + "user_id" : 728995, + "user_name": "莚", + }, + }), + ("https://horne.red/history_nuita.php?id=58000"), + ) def image_ids(self): return self._pagination("history_nuita") @@ -252,7 +327,7 @@ class NijieImageExtractor(NijieExtractor): - """Extractor for a work/image from nijie.info""" + """Extractor for a nijie work/image""" subcategory = "image" pattern = BASE_PATTERN + r"/view(?:_popup)?\.php\?id=(\d+)" test = ( @@ -265,11 +340,26 @@ "count": 0, }), ("https://nijie.info/view_popup.php?id=70720"), + ("https://horne.red/view.php?id=8716", { + "count": 4, + "keyword": { + "artist_id": 58000, + "artist_name": "のえるわ", + "date": "dt:2018-02-04 14:47:24", + "description": "ノエル「そんなことしなくても、" + "言ってくれたら咥えるのに・・・♡」", + "image_id": 8716, + "tags": ["男の娘", "フェラ", "オリキャラ", "うちのこ"], + "title": "ノエル「いまどきそんな、恵方巻ネタなんてやらなくても・・・」", + "user_id": 58000, + "user_name": "のえるわ", + }, + }), ) def __init__(self, match): NijieExtractor.__init__(self, match) - self.image_id = match.group(1) + self.image_id = match.group(match.lastindex) def image_ids(self): return (self.image_id,) diff -Nru gallery-dl-1.22.1/gallery_dl/extractor/poipiku.py gallery-dl-1.22.3/gallery_dl/extractor/poipiku.py --- gallery-dl-1.22.1/gallery_dl/extractor/poipiku.py 1970-01-01 00:00:00.000000000 +0000 +++ gallery-dl-1.22.3/gallery_dl/extractor/poipiku.py 2022-06-22 10:49:37.000000000 +0000 @@ -0,0 +1,169 @@ +# -*- coding: utf-8 -*- + +# Copyright 2022 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://poipiku.com/""" + +from .common import Extractor, Message +from .. import text + +BASE_PATTERN = r"(?:https?://)?poipiku\.com" + + +class PoipikuExtractor(Extractor): + """Base class for poipiku extractors""" + category = "poipiku" + root = "https://poipiku.com" + directory_fmt = ("{category}", "{user_id} {user_name}") + filename_fmt = "{post_id}_{num}.{extension}" + archive_fmt = "{post_id}_{num}" + request_interval = (0.5, 1.5) + + def items(self): + password = self.config("password", "") + + for post_url in self.posts(): + parts = post_url.split("/") + if post_url[0] == "/": + post_url = self.root + post_url + page = self.request(post_url).text + extr = text.extract_from(page) + + post = { + "post_category": extr("[", "]"), + "count" : extr("(", " "), + "post_id" : parts[-1].partition(".")[0], + "user_id" : parts[-2], + "user_name" : text.unescape(extr( + '<h2 class="UserInfoUserName">', '</').rpartition(">")[2]), + "description": text.unescape(extr( + 'class="IllustItemDesc" >', '<')), + } + + yield Message.Directory, post + post["num"] = 0 + + while True: + thumb = extr('class="IllustItemThumbImg" src="', '"') + if not thumb: + break + elif thumb.startswith("/img/"): + continue + post["num"] += 1 + url = text.ensure_http_scheme(thumb[:-8]) + yield Message.Url, url, text.nameext_from_url(url, post) + + if not extr('</i> show all', '<'): + continue + + url = self.root + "/f/ShowAppendFileF.jsp" + headers = { + "Accept" : "application/json, text/javascript, */*; q=0.01", + "X-Requested-With": "XMLHttpRequest", + "Origin" : self.root, + "Referer": post_url, + } + data = { + "UID": post["user_id"], + "IID": post["post_id"], + "PAS": password, + "MD" : "0", + "TWF": "-1", + } + page = self.request( + url, method="POST", headers=headers, data=data).json()["html"] + + for thumb in text.extract_iter( + page, 'class="IllustItemThumbImg" src="', '"'): + post["num"] += 1 + url = text.ensure_http_scheme(thumb[:-8]) + yield Message.Url, url, text.nameext_from_url(url, post) + + +class PoipikuUserExtractor(PoipikuExtractor): + """Extractor for posts from a poipiku user""" + subcategory = "user" + pattern = (BASE_PATTERN + r"/(?:IllustListPcV\.jsp\?PG=(\d+)&ID=)?" + r"(\d+)/?(?:$|[?&#])") + test = ( + ("https://poipiku.com/25049/", { + "pattern": r"https://img\.poipiku\.com/user_img\d+/000025049" + r"/\d+_\w+\.(jpe?g|png)$", + "range": "1-10", + "count": 10, + }), + ("https://poipiku.com/IllustListPcV.jsp?PG=1&ID=25049&KWD=") + ) + + def __init__(self, match): + PoipikuExtractor.__init__(self, match) + self._page, self.user_id = match.groups() + + def posts(self): + url = self.root + "/IllustListPcV.jsp" + params = { + "PG" : text.parse_int(self._page, 0), + "ID" : self.user_id, + "KWD": "", + } + + while True: + page = self.request(url, params=params).text + + cnt = 0 + for path in text.extract_iter( + page, 'class="IllustInfo" href="', '"'): + yield path + cnt += 1 + + if cnt < 48: + return + params["PG"] += 1 + + +class PoipikuPostExtractor(PoipikuExtractor): + """Extractor for a poipiku post""" + subcategory = "post" + pattern = BASE_PATTERN + r"/(\d+)/(\d+)" + test = ( + ("https://poipiku.com/25049/5864576.html", { + "pattern": r"https://img\.poipiku\.com/user_img03/000025049" + r"/005864576_EWN1Y65gQ\.png$", + "keyword": { + "count": "1", + "description": "", + "extension": "png", + "filename": "005864576_EWN1Y65gQ", + "num": 1, + "post_category": "DOODLE", + "post_id": "5864576", + "user_id": "25049", + "user_name": "ユキウサギ", + }, + }), + ("https://poipiku.com/2166245/6411749.html", { + "pattern": r"https://img\.poipiku\.com/user_img01/002166245" + r"/006411749_\w+\.jpeg$", + "count": 4, + "keyword": { + "count": "4", + "description": "絵茶の産物ネタバレあるやつ", + "num": int, + "post_category": "SPOILER", + "post_id": "6411749", + "user_id": "2166245", + "user_name": "wadahito", + }, + }), + ) + + def __init__(self, match): + PoipikuExtractor.__init__(self, match) + self.user_id, self.post_id = match.groups() + + def posts(self): + return ("/{}/{}.html".format(self.user_id, self.post_id),) diff -Nru gallery-dl-1.22.1/gallery_dl/extractor/readcomiconline.py gallery-dl-1.22.3/gallery_dl/extractor/readcomiconline.py --- gallery-dl-1.22.1/gallery_dl/extractor/readcomiconline.py 2022-05-31 10:15:23.000000000 +0000 +++ gallery-dl-1.22.3/gallery_dl/extractor/readcomiconline.py 2022-06-11 13:08:16.000000000 +0000 @@ -130,12 +130,13 @@ def beau(url): """https://readcomiconline.li/Scripts/rguard.min.js""" - if url.startswith("https"): - return url - url = url.replace("_x236", "d") url = url.replace("_x945", "g") + if url.startswith("https"): + return url + + url, sep, rest = url.partition("?") containsS0 = "=s0" in url url = url[:-3 if containsS0 else -6] url = url[4:22] + url[25:] @@ -143,4 +144,4 @@ url = binascii.a2b_base64(url).decode() url = url[0:13] + url[17:] url = url[0:-2] + ("=s0" if containsS0 else "=s1600") - return "https://2.bp.blogspot.com/" + url + return "https://2.bp.blogspot.com/" + url + sep + rest diff -Nru gallery-dl-1.22.1/gallery_dl/extractor/skeb.py gallery-dl-1.22.3/gallery_dl/extractor/skeb.py --- gallery-dl-1.22.1/gallery_dl/extractor/skeb.py 2022-05-30 10:58:03.000000000 +0000 +++ gallery-dl-1.22.3/gallery_dl/extractor/skeb.py 2022-06-27 09:31:27.000000000 +0000 @@ -135,10 +135,11 @@ "body": "re:はじめまして。私はYouTubeにてVTuberとして活動をしている湊ラ", "client": { "avatar_url": "https://pbs.twimg.com/profile_images" - "/1471184042791895042/f0DcWFGl.jpg", - "header_url": None, + "/1537488326697287680/yNUbLDgC.jpg", + "header_url": "https://pbs.twimg.com/profile_banners" + "/1375007870291300358/1655744756/1500x500", "id": 1196514, - "name": "湊ラギ", + "name": "湊ラギ♦️🎀Vtuber🎀次回6/23予定", "screen_name": "minato_ragi", }, "completed_at": "2022-02-27T14:03:45.442Z", @@ -208,3 +209,30 @@ posts = itertools.chain(posts, self._pagination(url, params)) return posts + + +class SkebFollowingExtractor(SkebExtractor): + """Extractor for all creators followed by a skeb user""" + subcategory = "following" + pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/following_creators" + test = ("https://skeb.jp/@user/following_creators",) + + def items(self): + for user in self.users(): + url = "{}/@{}".format(self.root, user["screen_name"]) + user["_extractor"] = SkebUserExtractor + yield Message.Queue, url, user + + def users(self): + url = "{}/api/users/{}/following_creators".format( + self.root, self.user_name) + headers = {"Referer": self.root, "Authorization": "Bearer null"} + params = {"sort": "date", "offset": 0, "limit": 90} + + while True: + data = self.request(url, params=params, headers=headers).json() + yield from data + + if len(data) < params["limit"]: + return + params["offset"] += params["limit"] diff -Nru gallery-dl-1.22.1/gallery_dl/extractor/twitter.py gallery-dl-1.22.3/gallery_dl/extractor/twitter.py --- gallery-dl-1.22.1/gallery_dl/extractor/twitter.py 2022-05-30 10:58:03.000000000 +0000 +++ gallery-dl-1.22.3/gallery_dl/extractor/twitter.py 2022-06-28 19:11:29.000000000 +0000 @@ -40,6 +40,7 @@ self.quoted = self.config("quoted", False) self.videos = self.config("videos", True) self.cards = self.config("cards", False) + self._user_id = None self._user_cache = {} self._init_sizes() @@ -59,6 +60,10 @@ self.api = TwitterAPI(self) metadata = self.metadata() + if self.config("expand"): + tweets = self._expand_tweets(self.tweets()) + self.tweets = lambda : tweets + for tweet in self.tweets(): if "legacy" in tweet: @@ -75,7 +80,8 @@ if "in_reply_to_user_id_str" in data and ( not self.replies or ( self.replies == "self" and - data["in_reply_to_user_id_str"] != data["user_id_str"] + (self._user_id or data["in_reply_to_user_id_str"]) != + data["user_id_str"] ) ): self.log.debug("Skipping %s (reply)", data["id_str"]) @@ -338,6 +344,22 @@ user["_extractor"] = cls yield Message.Queue, fmt(user), user + def _expand_tweets(self, tweets): + seen = set() + for tweet in tweets: + + if "legacy" in tweet: + cid = tweet["legacy"]["conversation_id_str"] + else: + cid = tweet["conversation_id_str"] + + if cid not in seen: + seen.add(cid) + try: + yield from self.api.tweet_detail(cid) + except Exception: + yield tweet + def metadata(self): """Return general metadata""" return {} @@ -418,12 +440,12 @@ self.user = "id:" + user_id def tweets(self): - tweets = (self.api.user_tweets(self.user) if self.retweets else - self.api.user_media(self.user)) + tweets = (self.api.user_tweets if self.retweets else + self.api.user_media) # yield initial batch of (media) tweets tweet = None - for tweet in tweets: + for tweet in tweets(self.user): yield tweet if tweet is None: @@ -442,12 +464,17 @@ if "legacy" in tweet: tweet = tweet["legacy"] + # build search query + query = "from:{} max_id:{}".format(username, tweet["id_str"]) + if self.retweets: + query += " include:retweets include:nativeretweets" + if not self.textonly: + query += (" (filter:images OR" + " filter:native_video OR" + " card_name:animated_gif)") + # yield search results starting from last tweet id - yield from self.api.search_adaptive( - "from:{} include:retweets include:nativeretweets max_id:{} " - "filter:images OR card_name:animated_gif OR filter:native_video" - .format(username, tweet["id_str"]) - ) + yield from self.api.search_adaptive(query) class TwitterTweetsExtractor(TwitterExtractor): @@ -694,10 +721,10 @@ "date" : "dt:2020-08-20 04:00:28", }, }), - # all Tweets from a conversation (#1319) - ("https://twitter.com/BlankArts_/status/1323314488611872769", { + # all Tweets from a 'conversation' (#1319) + ("https://twitter.com/supernaturepics/status/604341487988576256", { "options": (("conversations", True),), - "count": ">= 50", + "count": 5, }), # retweet with missing media entities (#1555) ("https://twitter.com/morino_ya/status/1392763691599237121", { @@ -845,8 +872,11 @@ cookies = extractor.session.cookies cookiedomain = extractor.cookiedomain - # CSRF - csrf_token = cookies.get("ct0", domain=cookiedomain) + csrf = extractor.config("csrf") + if csrf is None or csrf == "cookies": + csrf_token = cookies.get("ct0", domain=cookiedomain) + else: + csrf_token = None if not csrf_token: csrf_token = util.generate_token() cookies.set("ct0", csrf_token, domain=cookiedomain) @@ -1000,19 +1030,23 @@ def _user_id_by_screen_name(self, screen_name): if screen_name.startswith("id:"): self._user = util.SENTINEL - return screen_name[3:] + user_id = screen_name[3:] - user = () - try: - user = self._user = self.user_by_screen_name(screen_name) - return user["rest_id"] - except KeyError: - if "unavailable_message" in user: - raise exception.NotFoundError("{} ({})".format( - user["unavailable_message"].get("text"), - user.get("reason")), False) - else: - raise exception.NotFoundError("user") + else: + user = () + try: + user = self._user = self.user_by_screen_name(screen_name) + user_id = user["rest_id"] + except KeyError: + if "unavailable_message" in user: + raise exception.NotFoundError("{} ({})".format( + user["unavailable_message"].get("text"), + user.get("reason")), False) + else: + raise exception.NotFoundError("user") + + self.extractor._user_id = user_id + return user_id @cache(maxage=3600) def _guest_token(self): @@ -1228,6 +1262,8 @@ tweets.append(entry) elif esw("cursor-bottom-"): cursor = entry["content"] + if "itemContent" in cursor: + cursor = cursor["itemContent"] if not cursor.get("stopOnEmptyResponse", True): # keep going even if there are no tweets tweet = True diff -Nru gallery-dl-1.22.1/gallery_dl/extractor/unsplash.py gallery-dl-1.22.3/gallery_dl/extractor/unsplash.py --- gallery-dl-1.22.1/gallery_dl/extractor/unsplash.py 2022-05-30 10:58:03.000000000 +0000 +++ gallery-dl-1.22.3/gallery_dl/extractor/unsplash.py 2022-06-13 19:23:04.000000000 +0000 @@ -30,12 +30,16 @@ def items(self): fmt = self.config("format") or "raw" + metadata = self.metadata() + for photo in self.photos(): util.delete_items( photo, ("current_user_collections", "related_collections")) url = photo["urls"][fmt] text.nameext_from_url(url, photo) + if metadata: + photo.update(metadata) photo["extension"] = "jpg" photo["date"] = text.parse_datetime(photo["created_at"]) if "tags" in photo: @@ -44,6 +48,10 @@ yield Message.Directory, photo yield Message.Url, url, photo + @staticmethod + def metadata(): + return None + def skip(self, num): pages = num // self.per_page self.page_start += pages @@ -172,17 +180,27 @@ class UnsplashCollectionExtractor(UnsplashExtractor): """Extractor for an unsplash collection""" subcategory = "collection" - pattern = BASE_PATTERN + r"/collections/([^/?#]+)" + pattern = BASE_PATTERN + r"/collections/([^/?#]+)(?:/([^/?#]+))?" test = ( ("https://unsplash.com/collections/3178572/winter", { "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+" r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$", + "keyword": {"collection_id": "3178572", + "collection_title": "winter"}, "range": "1-30", "count": 30, }), + ("https://unsplash.com/collections/3178572/"), ("https://unsplash.com/collections/_8qJQ2bCMWE/2021.05"), ) + def __init__(self, match): + UnsplashExtractor.__init__(self, match) + self.title = match.group(2) or "" + + def metadata(self): + return {"collection_id": self.item, "collection_title": self.title} + def photos(self): url = "{}/napi/collections/{}/photos".format(self.root, self.item) params = {"order_by": "latest"} diff -Nru gallery-dl-1.22.1/gallery_dl/extractor/vk.py gallery-dl-1.22.3/gallery_dl/extractor/vk.py --- gallery-dl-1.22.1/gallery_dl/extractor/vk.py 2022-05-30 10:58:03.000000000 +0000 +++ gallery-dl-1.22.3/gallery_dl/extractor/vk.py 2022-06-27 08:24:06.000000000 +0000 @@ -40,12 +40,12 @@ continue try: - photo["url"], photo["width"], photo["height"] = photo[size] + _, photo["width"], photo["height"] = photo[size] except ValueError: # photo without width/height entries (#2535) - photo["url"] = photo[size + "src"] photo["width"] = photo["height"] = 0 + photo["url"] = photo[size + "src"] photo["id"] = photo["id"].rpartition("_")[2] photo.update(data) diff -Nru gallery-dl-1.22.1/gallery_dl/extractor/weibo.py gallery-dl-1.22.3/gallery_dl/extractor/weibo.py --- gallery-dl-1.22.1/gallery_dl/extractor/weibo.py 2022-06-03 15:37:02.000000000 +0000 +++ gallery-dl-1.22.3/gallery_dl/extractor/weibo.py 2022-06-12 15:50:44.000000000 +0000 @@ -52,10 +52,6 @@ for status in self.statuses(): - status["date"] = text.parse_datetime( - status["created_at"], "%a %b %d %H:%M:%S %z %Y") - yield Message.Directory, status - if self.retweets and "retweeted_status" in status: if original_retweets: status = status["retweeted_status"] @@ -68,6 +64,10 @@ else: files = self._files_from_status(status) + status["date"] = text.parse_datetime( + status["created_at"], "%a %b %d %H:%M:%S %z %Y") + yield Message.Directory, status + for num, file in enumerate(files, 1): if file["url"].startswith("http:"): file["url"] = "https:" + file["url"][5:] @@ -191,7 +191,9 @@ subcategory = "user" pattern = USER_PATTERN + r"(?:$|#)" test = ( - ("https://weibo.com/1758989602"), + ("https://weibo.com/1758989602", { + "pattern": r"^https://weibo\.com/u/1758989602\?tabtype=feed$", + }), ("https://weibo.com/u/1758989602"), ("https://weibo.com/p/1758989602"), ("https://m.weibo.cn/profile/2314621010"), @@ -200,12 +202,13 @@ ) def items(self): - base = " {}/u/{}?tabtype=".format(self.root, self._user_id()) + base = "{}/u/{}?tabtype=".format(self.root, self._user_id()) return self._dispatch_extractors(( - (WeiboHomeExtractor , base + "home"), - (WeiboFeedExtractor , base + "feed"), - (WeiboVideosExtractor, base + "newVideo"), - (WeiboAlbumExtractor , base + "album"), + (WeiboHomeExtractor , base + "home"), + (WeiboFeedExtractor , base + "feed"), + (WeiboVideosExtractor , base + "video"), + (WeiboNewvideoExtractor, base + "newVideo"), + (WeiboAlbumExtractor , base + "album"), ), ("feed",)) @@ -254,8 +257,27 @@ class WeiboVideosExtractor(WeiboExtractor): - """Extractor for weibo 'newVideo' listings""" + """Extractor for weibo 'video' listings""" subcategory = "videos" + pattern = USER_PATTERN + r"\?tabtype=video" + test = ("https://weibo.com/1758989602?tabtype=video", { + "pattern": r"https://f\.(video\.weibocdn\.com|us\.sinaimg\.cn)" + r"/(../)?\w+\.mp4\?label=mp", + "range": "1-30", + "count": 30, + }) + + def statuses(self): + endpoint = "/profile/getprofilevideolist" + params = {"uid": self._user_id()} + + for status in self._pagination(endpoint, params): + yield status["video_detail_vo"] + + +class WeiboNewvideoExtractor(WeiboExtractor): + """Extractor for weibo 'newVideo' listings""" + subcategory = "newvideo" pattern = USER_PATTERN + r"\?tabtype=newVideo" test = ("https://weibo.com/1758989602?tabtype=newVideo", { "pattern": r"https://f\.video\.weibocdn\.com/(../)?\w+\.mp4\?label=mp", @@ -336,8 +358,8 @@ }), # type == gif ("https://weibo.com/1758989602/LvBhm5DiP", { - "pattern": r"http://g\.us\.sinaimg.cn/o0/qNZcaAAglx07Wuf921CM01041" - r"20005tc0E010\.mp4\?label=gif_mp4", + "pattern": r"https://g\.us\.sinaimg.cn/o0/qNZcaAAglx07Wuf921CM0104" + r"120005tc0E010\.mp4\?label=gif_mp4", }), ("https://m.weibo.cn/status/4339748116375525"), ("https://m.weibo.cn/5746766133/4339748116375525"), diff -Nru gallery-dl-1.22.1/gallery_dl/formatter.py gallery-dl-1.22.3/gallery_dl/formatter.py --- gallery-dl-1.22.1/gallery_dl/formatter.py 2022-05-30 10:58:03.000000000 +0000 +++ gallery-dl-1.22.3/gallery_dl/formatter.py 2022-06-25 14:52:58.000000000 +0000 @@ -14,6 +14,7 @@ import _string import datetime import operator +import functools from . import text, util _CACHE = {} @@ -231,12 +232,7 @@ func = operator.itemgetter try: if ":" in key: - start, _, stop = key.partition(":") - stop, _, step = stop.partition(":") - start = int(start) if start else None - stop = int(stop) if stop else None - step = int(step) if step else None - key = slice(start, stop, step) + key = _slice(key) except TypeError: pass # key is an integer @@ -245,6 +241,16 @@ return first, funcs +def _slice(indices): + start, _, stop = indices.partition(":") + stop, _, step = stop.partition(":") + return slice( + int(start) if start else None, + int(stop) if stop else None, + int(step) if step else None, + ) + + def parse_format_spec(format_spec, conversion): fmt = build_format_func(format_spec) if not conversion: @@ -257,7 +263,7 @@ "u": str.upper, "c": str.capitalize, "C": string.capwords, - "j": json.dumps, + "j": functools.partial(json.dumps, default=str), "t": str.strip, "T": util.datetime_to_timestamp_string, "d": text.parse_timestamp, @@ -282,6 +288,8 @@ fmt = format_spec[0] if fmt == "?": return _parse_optional(format_spec) + if fmt == "[": + return _parse_slice(format_spec) if fmt == "L": return _parse_maxlen(format_spec) if fmt == "J": @@ -304,6 +312,16 @@ return optional +def _parse_slice(format_spec): + indices, _, format_spec = format_spec.partition("]") + slice = _slice(indices[1:]) + fmt = build_format_func(format_spec) + + def apply_slice(obj): + return fmt(obj[slice]) + return apply_slice + + def _parse_maxlen(format_spec): maxlen, replacement, format_spec = format_spec.split("/", 2) maxlen = text.parse_int(maxlen[1:]) diff -Nru gallery-dl-1.22.1/gallery_dl/version.py gallery-dl-1.22.3/gallery_dl/version.py --- gallery-dl-1.22.1/gallery_dl/version.py 2022-06-04 17:41:17.000000000 +0000 +++ gallery-dl-1.22.3/gallery_dl/version.py 2022-06-28 20:42:25.000000000 +0000 @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.22.1" +__version__ = "1.22.3" diff -Nru gallery-dl-1.22.1/gallery_dl.egg-info/PKG-INFO gallery-dl-1.22.3/gallery_dl.egg-info/PKG-INFO --- gallery-dl-1.22.1/gallery_dl.egg-info/PKG-INFO 2022-06-04 17:41:19.000000000 +0000 +++ gallery-dl-1.22.3/gallery_dl.egg-info/PKG-INFO 2022-06-28 20:42:27.000000000 +0000 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.22.1 +Version: 1.22.3 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -98,8 +98,8 @@ Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.22.1/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.22.1/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.22.3/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.22.3/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml diff -Nru gallery-dl-1.22.1/gallery_dl.egg-info/SOURCES.txt gallery-dl-1.22.3/gallery_dl.egg-info/SOURCES.txt --- gallery-dl-1.22.1/gallery_dl.egg-info/SOURCES.txt 2022-06-04 17:41:19.000000000 +0000 +++ gallery-dl-1.22.3/gallery_dl.egg-info/SOURCES.txt 2022-06-28 20:42:27.000000000 +0000 @@ -102,6 +102,7 @@ gallery_dl/extractor/inkbunny.py gallery_dl/extractor/instagram.py gallery_dl/extractor/issuu.py +gallery_dl/extractor/itaku.py gallery_dl/extractor/kabeuchi.py gallery_dl/extractor/keenspot.py gallery_dl/extractor/kemonoparty.py @@ -149,6 +150,7 @@ gallery_dl/extractor/pixiv.py gallery_dl/extractor/pixnet.py gallery_dl/extractor/plurk.py +gallery_dl/extractor/poipiku.py gallery_dl/extractor/pornhub.py gallery_dl/extractor/pururin.py gallery_dl/extractor/reactor.py diff -Nru gallery-dl-1.22.1/PKG-INFO gallery-dl-1.22.3/PKG-INFO --- gallery-dl-1.22.1/PKG-INFO 2022-06-04 17:41:19.629032100 +0000 +++ gallery-dl-1.22.3/PKG-INFO 2022-06-28 20:42:28.142686400 +0000 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.22.1 +Version: 1.22.3 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -98,8 +98,8 @@ Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.22.1/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.22.1/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.22.3/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.22.3/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml diff -Nru gallery-dl-1.22.1/README.rst gallery-dl-1.22.3/README.rst --- gallery-dl-1.22.1/README.rst 2022-06-04 17:41:17.000000000 +0000 +++ gallery-dl-1.22.3/README.rst 2022-06-28 20:42:25.000000000 +0000 @@ -65,8 +65,8 @@ Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.22.1/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.22.1/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.22.3/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.22.3/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml diff -Nru gallery-dl-1.22.1/test/test_cookies.py gallery-dl-1.22.3/test/test_cookies.py --- gallery-dl-1.22.1/test/test_cookies.py 2022-05-30 10:58:03.000000000 +0000 +++ gallery-dl-1.22.3/test/test_cookies.py 2022-06-25 14:51:34.000000000 +0000 @@ -89,7 +89,7 @@ self.assertEqual(sorted(cookies.values()), sorted(self.cdict.values())) def test_domain(self): - for category in ["exhentai", "idolcomplex", "nijie"]: + for category in ["exhentai", "idolcomplex", "nijie", "horne"]: extr = _get_extractor(category) cookies = extr.session.cookies for key in self.cdict: @@ -107,7 +107,8 @@ extr_cookies = { "exhentai" : ("ipb_member_id", "ipb_pass_hash"), "idolcomplex": ("login", "pass_hash"), - "nijie" : ("nemail", "nlogin"), + "nijie" : ("nijie_tok",), + "horne" : ("horne_tok",), } for category, cookienames in extr_cookies.items(): cookies = {name: "value" for name in cookienames} @@ -199,10 +200,13 @@ def _get_extractor(category): - for extr in extractor.extractors(): - if extr.category == category and hasattr(extr, "_login_impl"): - url = next(extr._get_tests())[0] - return extr.from_url(url) + URLS = { + "exhentai" : "https://exhentai.org/g/1200119/d55c44d3d0/", + "idolcomplex": "https://idol.sankakucomplex.com/post/show/1", + "nijie" : "https://nijie.info/view.php?id=1", + "horne" : "https://horne.red/view.php?id=1", + } + return extractor.find(URLS[category]) if __name__ == "__main__": diff -Nru gallery-dl-1.22.1/test/test_formatter.py gallery-dl-1.22.3/test/test_formatter.py --- gallery-dl-1.22.1/test/test_formatter.py 2022-05-30 10:58:03.000000000 +0000 +++ gallery-dl-1.22.3/test/test_formatter.py 2022-06-25 14:52:58.000000000 +0000 @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# Copyright 2021 Mike Fährmann +# Copyright 2021-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -56,6 +56,7 @@ self._run_test("{t!d:%Y-%m-%d}", "2010-01-01") self._run_test("{dt!T}", "1262304000") self._run_test("{l!j}", '["a", "b", "c"]') + self._run_test("{dt!j}", '"2010-01-01 00:00:00"') with self.assertRaises(KeyError): self._run_test("{a!q}", "hello world") @@ -134,6 +135,21 @@ self._run_test("{a[:50:2]}", v[:50:2]) self._run_test("{a[::]}" , v) + self._run_test("{a:[1:10]}" , v[1:10]) + self._run_test("{a:[-10:-1]}", v[-10:-1]) + self._run_test("{a:[5:]}" , v[5:]) + self._run_test("{a:[50:]}", v[50:]) + self._run_test("{a:[:5]}" , v[:5]) + self._run_test("{a:[:50]}", v[:50]) + self._run_test("{a:[:]}" , v) + self._run_test("{a:[1:10:2]}" , v[1:10:2]) + self._run_test("{a:[-10:-1:2]}", v[-10:-1:2]) + self._run_test("{a:[5::2]}" , v[5::2]) + self._run_test("{a:[50::2]}", v[50::2]) + self._run_test("{a:[:5:2]}" , v[:5:2]) + self._run_test("{a:[:50:2]}", v[:50:2]) + self._run_test("{a:[::]}" , v) + def test_maxlen(self): v = self.kwdict["a"] self._run_test("{a:L5/foo/}" , "foo") @@ -176,6 +192,9 @@ # join-and-replace self._run_test("{l:J-/Rb/E/}", "a-E-c") + # join and slice + self._run_test("{l:J-/[1:-1]}", "-b-") + # optional-and-maxlen self._run_test("{d[a]:?</>/L1/too long/}", "<too long>") self._run_test("{d[c]:?</>/L5/too long/}", "") diff -Nru gallery-dl-1.22.1/test/test_results.py gallery-dl-1.22.3/test/test_results.py --- gallery-dl-1.22.1/test/test_results.py 2022-05-17 12:36:12.000000000 +0000 +++ gallery-dl-1.22.3/test/test_results.py 2022-06-25 14:51:34.000000000 +0000 @@ -312,6 +312,7 @@ config.set(("extractor", "nijie") , "username", email) config.set(("extractor", "seiga") , "username", email) + config.set(("extractor", "horne") , "username", email2) config.set(("extractor", "pinterest") , "username", email2) config.set(("extractor", "pinterest") , "username", None) # login broken