[archiveofsins] add thread extractor

This commit is contained in:
Mike Fährmann
2017-07-15 13:23:04 +02:00
parent 96e13604da
commit c93f7d7496
4 changed files with 26 additions and 5 deletions

View File

@@ -17,6 +17,7 @@ modules = [
"4plebs", "4plebs",
"8chan", "8chan",
"archivedmoe", "archivedmoe",
"archiveofsins",
"batoto", "batoto",
"danbooru", "danbooru",
"desuarchive", "desuarchive",

View File

@@ -25,8 +25,3 @@ class ArchivedmoeThreadExtractor(chan.FoolfuukaThreadExtractor):
"url": "ffec05a1a1b906b5ca85992513671c9155ee9e87", "url": "ffec05a1a1b906b5ca85992513671c9155ee9e87",
}), }),
] ]
def __init__(self, match):
chan.FoolfuukaThreadExtractor.__init__(self, match)
self.session.headers["User-Agent"] = "Mozilla 5.0"
self.session.headers["Referer"] = self.root

View File

@@ -0,0 +1,23 @@
# -*- coding: utf-8 -*-
# Copyright 2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract images from https://archiveofsins.com/"""
from . import chan
class ArchiveofsinsThreadExtractor(chan.FoolfuukaThreadExtractor):
"""Extractor for images from threads on archiveofsins.com"""
category = "archiveofsins"
root = "https://archiveofsins.com"
pattern = [r"(?:https?://)?(?:www\.)?archiveofsins\.com"
r"/([^/]+)/thread/(\d+)"]
test = [("https://www.archiveofsins.com/h/thread/4668813/", {
"url": "f612d287087e10a228ef69517cf811539db9a102",
"content": "0dd92d0d8a7bf6e2f7d1f5ac8954c1bcf18c22a4",
})]

View File

@@ -73,6 +73,8 @@ class FoolfuukaThreadExtractor(Extractor):
def __init__(self, match): def __init__(self, match):
Extractor.__init__(self) Extractor.__init__(self)
self.board, self.thread = match.groups() self.board, self.thread = match.groups()
self.session.headers["User-Agent"] = "Mozilla 5.0"
self.session.headers["Referer"] = self.root
def items(self): def items(self):
op = True op = True