merge #5126: [naver] fix EUC-KR encoding issue in old image URLs

This commit is contained in:
Mike Fährmann
2024-03-06 00:22:33 +01:00
2 changed files with 34 additions and 4 deletions

View File

@@ -63,10 +63,13 @@ class NaverPostExtractor(NaverBase, GalleryExtractor):
return data
def images(self, page):
return [
(url.replace("://post", "://blog", 1).partition("?")[0], None)
for url in text.extract_iter(page, 'data-lazy-src="', '"')
]
results = []
for url in text.extract_iter(page, 'data-lazy-src="', '"'):
url = url.replace("://post", "://blog", 1).partition("?")[0]
if "\ufffd" in text.unquote(url):
url = text.unquote(url, encoding="EUC-KR")
results.append((url, None))
return results
class NaverBlogExtractor(NaverBase, Extractor):