[sexcom] skip unavailable pins (#325)

This commit is contained in:
Mike Fährmann
2019-07-02 22:05:54 +02:00
parent 8966930c5c
commit 69997e92db

View File

@@ -23,9 +23,9 @@ class SexcomExtractor(Extractor):
def items(self): def items(self):
yield Message.Version, 1 yield Message.Version, 1
yield Message.Directory, self.metadata() yield Message.Directory, self.metadata()
for url in self.pins(): for pin in map(self._parse_pin, self.pins()):
pin = self._parse_pin(url) if pin:
yield Message.Url, pin["url"], pin yield Message.Url, pin["url"], pin
def metadata(self): def metadata(self):
return {} return {}
@@ -49,8 +49,13 @@ class SexcomExtractor(Extractor):
return return
url = text.urljoin(self.root, url) url = text.urljoin(self.root, url)
def _parse_pin(self, pin_url): def _parse_pin(self, url, expect=range(400, 429)):
extr = text.extract_from(self.request(pin_url).text) response = self.request(url, expect=expect)
if response.status_code >= 400:
self.log.warning("Unable to fetch %s (%s: %s)",
url, response.status_code, response.reason)
return None
extr = text.extract_from(response.text)
data = {} data = {}
data["thumbnail"] = extr('itemprop="thumbnail" content="', '"') data["thumbnail"] = extr('itemprop="thumbnail" content="', '"')
@@ -124,6 +129,10 @@ class SexcomPinExtractor(SexcomExtractor):
("https://www.sex.com/pin/55847384-very-nicely-animated/", { ("https://www.sex.com/pin/55847384-very-nicely-animated/", {
"pattern": "ytdl:https://www.pornhub.com/embed/ph56ef24b6750f2", "pattern": "ytdl:https://www.pornhub.com/embed/ph56ef24b6750f2",
}), }),
# 404
("https://www.sex.com/pin/55847385/", {
"count": 0,
}),
) )
def __init__(self, match): def __init__(self, match):