From a14b72be21c216491a4e07b691652d45b5319935 Mon Sep 17 00:00:00 2001 From: Kyle Anthony Williams Date: Thu, 11 Nov 2021 14:03:34 -0500 Subject: [PATCH] [webtoons] Use swebtoon-phinf.pstatic.net instead of webtoon-phinf.pstatic.net (#2005) * [webtoons] Use swebtoon-phinf.pstatic.net instead of webtoon-phinf.pstatic.net This trick to avoid having to set a Referer header comes from Webtoon's RSS feeds. The two URLs below are equivalent in content: https://webtoon-phinf.pstatic.net/20210929_153/1632867980912DmcGK_JPEG/16328679808882705182.jpg?type=q90 https://swebtoon-phinf.pstatic.net/20210929_153/1632867980912DmcGK_JPEG/16328679808882705182.jpg?type=q90 The URL with the domain "webtoon-phinf.pstatic.net" needs a Referer header, and the domain "swebtoon-phinf.pstatic.net" does not. This is because of the environment "swebtoon" images live in, one without explicit network control: RSS feeds on sites such as Feedly. This change should make it easier for gallery-dl developers to embed Webtoon comics without worrying about headers. --- gallery_dl/extractor/webtoons.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py index e2474c9c..cf5b192c 100644 --- a/gallery_dl/extractor/webtoons.py +++ b/gallery_dl/extractor/webtoons.py @@ -48,7 +48,7 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor): test = ( (("https://www.webtoons.com/en/comedy/safely-endangered" "/ep-572-earth/viewer?title_no=352&episode_no=572"), { - "url": "11041d71a3f92728305c11a228e77cf0f7aa02ef", + "url": "55bec5d7c42aba19e3d0d56db25fdf0b0b13be38", "content": ("1748c7e82b6db910fa179f6dc7c4281b0f680fa7", "42055e44659f6ffc410b3fb6557346dfbb993df3", "49e1f2def04c6f7a6a3dacf245a1cd9abe77a6a9"), @@ -62,7 +62,6 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor): url = "{}/{}/viewer?{}".format(self.root, self.path, query) GalleryExtractor.__init__(self, match, url) self.setup_agegate_cookies() - self.session.headers["Referer"] = url query = text.parse_query(query) self.title_no = query.get("title_no") @@ -88,7 +87,7 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor): @staticmethod def images(page): return [ - (url, None) + (url.replace("://webtoon-phinf.", "://swebtoon-phinf."), None) for url in text.extract_iter( page, 'class="_images" data-url="', '"') ]