From 8a42ea736ad86c294e945f2eb92dd773f8d7bb00 Mon Sep 17 00:00:00 2001 From: blankie Date: Sat, 23 Dec 2023 13:28:36 +1100 Subject: [PATCH] [postmill] implement suggestions --- docs/configuration.rst | 2 +- gallery_dl/extractor/postmill.py | 41 ++++++++++++++++---------------- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index c49dc2c9..fb4b93c6 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -2735,7 +2735,7 @@ Description extractor.[postmill].save-link-post-body ------------------------- +---------------------------------------- Type ``bool`` Default diff --git a/gallery_dl/extractor/postmill.py b/gallery_dl/extractor/postmill.py index 4d4b38a2..29b351ba 100644 --- a/gallery_dl/extractor/postmill.py +++ b/gallery_dl/extractor/postmill.py @@ -7,7 +7,6 @@ """Extractors for Postmill instances""" import re -import urllib.parse from .common import BaseExtractor, Message from .. import text, exception @@ -28,8 +27,8 @@ class PostmillExtractor(BaseExtractor): def items(self): for post_url in self.post_urls(): - response = self.request(post_url) - extr = text.extract_from(response.text) + page = self.request(post_url).text + extr = text.extract_from(page) title = text.unescape(extr( '')) @@ -52,7 +51,7 @@ class PostmillExtractor(BaseExtractor): id = int(match.group(2)) is_text_post = url.startswith("/") - is_image_post = self._search_image_tag(response.text) is not None + is_image_post = self._search_image_tag(page) is not None data = { "title": title, "date": date, @@ -60,7 +59,7 @@ class PostmillExtractor(BaseExtractor): "forum": forum, "id": id, "flair": [text.unescape(i) for i in text.extract_iter( - response.text, '', '')], + page, '', '')], "instance": self.instance, } @@ -90,32 +89,32 @@ class PostmillSubmissionsExtractor(PostmillExtractor): def __init__(self, match): PostmillExtractor.__init__(self, match) - self.base = match.group(3) - self.sorting_path = match.group(4) or "" + groups = match.groups() + self.base = groups[-3] + self.sorting_path = groups[-2] or "" self.query = {key: value for key, value in text.parse_query( - match.group(5) or "").items() if self.acceptable_query(key)} + groups[-1]).items() if self.acceptable_query(key)} def items(self): url = self.root + self.base + self.sorting_path - if self.query: - url += "?" + urllib.parse.urlencode(self.query) while url: - response = self.request(url) + response = self.request(url, params=self.query) if response.history: redirect_url = response.url if redirect_url == self.root + "/login": raise exception.StopExtraction( "HTTP redirect to login page (%s)", redirect_url) + page = response.text - for nav in text.extract_iter(response.text, + for nav in text.extract_iter(page, ''): post_url = text.unescape(text.extr(nav, '