[sexcom] fix 'gif' pin extraction (#7239)

with much less metadata
This commit is contained in:
Mike Fährmann
2025-03-27 19:54:17 +01:00
parent fc868b02f6
commit 12327b076e
2 changed files with 45 additions and 8 deletions

View File

@@ -55,10 +55,16 @@ class SexcomExtractor(Extractor):
self.log.warning('Unable to fetch %s ("%s %s")',
url, response.status_code, response.reason)
return None
if "/pin/" in response.url:
return self._parse_pin_legacy(response)
return self._parse_pin_new(response)
def _parse_pin_legacy(self, response):
extr = text.extract_from(response.text)
data = {}
data["_http_headers"] = {"Referer": url}
data["_http_headers"] = {"Referer": response.url}
data["thumbnail"] = extr('itemprop="thumbnail" content="', '"')
data["type"] = extr('<h1>' , '<').rstrip(" -").strip().lower()
data["title"] = text.unescape(extr('itemprop="name">' , '<'))
@@ -84,7 +90,8 @@ class SexcomExtractor(Extractor):
src = (text.extr(iframe, ' src="', '"') or
text.extr(iframe, " src='", "'"))
if not src:
self.log.warning("Unable to fetch media from %s", url)
self.log.warning(
"Unable to fetch media from %s", response.url)
return None
data["extension"] = None
data["url"] = "ytdl:" + src
@@ -102,6 +109,20 @@ class SexcomExtractor(Extractor):
return data
def _parse_pin_new(self, response):
extr = text.extract_from(response.text)
data = {
"_http_headers": {"Referer": response.url},
"type": "gif",
"url": extr(' href="', '"'),
"title": text.unescape(extr("<title>", " Gif | Sex.com<")),
"pin_id": text.parse_int(extr(
'rel="canonical" href="', '"').rpartition("/")[2]),
"tags": text.split_html(extr("</h1>", "</section>")),
}
return text.nameext_from_url(data["url"], data)
class SexcomPinExtractor(SexcomExtractor):
"""Extractor for a pinned image or video on www.sex.com"""

View File

@@ -16,19 +16,24 @@ __tests__ = (
"#urls" : "https://imagex1.sx.cdn.live/images/pinporn/2014/08/26/7637609.jpg",
"#sha1_content": "8cd419c6790ef7348bd398c364ab10f956e438dc",
"comments" : int,
"comments" : range(0, 5),
"date" : "dt:2014-10-19 15:45:44",
"extension": "jpg",
"filename" : "7637609",
"likes" : int,
"likes" : range(240, 275),
"pin_id" : 21241874,
"repins" : int,
"tags" : list,
"thumbnail": str,
"repins" : range(90, 120),
"thumbnail": "https://imagex1.sx.cdn.live/images/pinporn/2014/08/26/7637609.jpg?width=300",
"title" : "Sexy Ecchi Girls 166",
"type" : "picture",
"uploader" : "mangazeta",
"url" : str,
"url" : "https://imagex1.sx.cdn.live/images/pinporn/2014/08/26/7637609.jpg",
"tags": [
"ecchi",
"ecchi-girls",
"Hot",
"sexy-ecchi",
],
},
{
@@ -38,6 +43,17 @@ __tests__ = (
"#class" : sexcom.SexcomPinExtractor,
"#urls" : "https://imagex1.sx.cdn.live/images/pinporn/2017/12/07/18760842.gif",
"#sha1_content": "176cc63fa05182cb0438c648230c0f324a5965fe",
"extension": "gif",
"filename" : "18760842",
"pin_id" : 209061,
"title" : "Ecchi",
"type" : "gif",
"url" : "https://imagex1.sx.cdn.live/images/pinporn/2017/12/07/18760842.gif",
"tags" : [
"#Big Tits",
"#Hentai",
],
},
{