add pre-generated 'pattern' for supported BaseExtractor sites

This commit is contained in:
Mike Fährmann
2022-05-09 22:20:09 +02:00
parent 6ae3a5cdb0
commit d26da3b9e5
9 changed files with 84 additions and 29 deletions

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extractors for 4chan archives based on FoolFuuka""" """Extractors for FoolFuuka 4chan archives"""
from .common import BaseExtractor, Message from .common import BaseExtractor, Message
from .. import text from .. import text
@@ -66,6 +66,7 @@ BASE_PATTERN = FoolfuukaExtractor.update({
}, },
"archivedmoe": { "archivedmoe": {
"root": "https://archived.moe", "root": "https://archived.moe",
"pattern": r"archived\.moe",
}, },
"archiveofsins": { "archiveofsins": {
"root": "https://archiveofsins.com", "root": "https://archiveofsins.com",
@@ -73,12 +74,15 @@ BASE_PATTERN = FoolfuukaExtractor.update({
}, },
"b4k": { "b4k": {
"root": "https://arch.b4k.co", "root": "https://arch.b4k.co",
"pattern": r"arch\.b4k\.co",
}, },
"desuarchive": { "desuarchive": {
"root": "https://desuarchive.org", "root": "https://desuarchive.org",
"pattern": r"desuarchive\.org",
}, },
"fireden": { "fireden": {
"root": "https://boards.fireden.net", "root": "https://boards.fireden.net",
"pattern": r"boards\.fireden\.net",
}, },
"nyafuu": { "nyafuu": {
"root": "https://archive.nyafuu.org", "root": "https://archive.nyafuu.org",
@@ -90,9 +94,11 @@ BASE_PATTERN = FoolfuukaExtractor.update({
}, },
"thebarchive": { "thebarchive": {
"root": "https://thebarchive.com", "root": "https://thebarchive.com",
"pattern": r"thebarchive\.com",
}, },
"wakarimasen": { "wakarimasen": {
"root": "https://archive.wakarimasen.moe", "root": "https://archive.wakarimasen.moe",
"pattern": r"archive\.wakarimasen\.moe",
}, },
}) })

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2016-2021 Mike Fährmann # Copyright 2016-2022 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -41,6 +41,7 @@ class FoolslideExtractor(BaseExtractor):
BASE_PATTERN = FoolslideExtractor.update({ BASE_PATTERN = FoolslideExtractor.update({
"kireicake": { "kireicake": {
"root": "https://reader.kireicake.com", "root": "https://reader.kireicake.com",
"pattern": r"reader\.kireicake\.com",
}, },
"powermanga": { "powermanga": {
"root": "https://read.powermanga.org", "root": "https://read.powermanga.org",

View File

@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2021 Mike Fährmann # Copyright 2021-2022 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extractors for Gelbooru v0.1 sites""" """Extractors for Gelbooru Beta 0.1.11 sites"""
from . import booru from . import booru
from .. import text from .. import text
@@ -59,12 +59,30 @@ class GelbooruV01Extractor(booru.BooruExtractor):
BASE_PATTERN = GelbooruV01Extractor.update({ BASE_PATTERN = GelbooruV01Extractor.update({
"thecollection" : {"root": "https://the-collection.booru.org"}, "thecollection": {
"illusioncardsbooru": {"root": "https://illusioncards.booru.org"}, "root": "https://the-collection.booru.org",
"allgirlbooru" : {"root": "https://allgirl.booru.org"}, "pattern": r"the-collection\.booru\.org",
"drawfriends" : {"root": "https://drawfriends.booru.org"}, },
"vidyart" : {"root": "https://vidyart.booru.org"}, "illusioncardsbooru": {
"theloudbooru" : {"root": "https://tlb.booru.org"}, "root": "https://illusioncards.booru.org",
"pattern": r"illusioncards\.booru\.org",
},
"allgirlbooru": {
"root": "https://allgirl.booru.org",
"pattern": r"allgirl\.booru\.org",
},
"drawfriends": {
"root": "https://drawfriends.booru.org",
"pattern": r"drawfriends\.booru\.org",
},
"vidyart": {
"root": "https://vidyart.booru.org",
"pattern": r"vidyart\.booru\.org",
},
"theloudbooru": {
"root": "https://tlb.booru.org",
"pattern": r"tlb\.booru\.org",
},
}) })

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extractors for Gelbooru v0.2 sites""" """Extractors for Gelbooru Beta 0.2 sites"""
from . import booru from . import booru
from .. import text, util, exception from .. import text, util, exception
@@ -116,11 +116,23 @@ class GelbooruV02Extractor(booru.BooruExtractor):
INSTANCES = { INSTANCES = {
"realbooru": {"root": "https://realbooru.com"}, "realbooru": {
"rule34" : {"root": "https://rule34.xxx", "root": "https://realbooru.com",
"api_root": " https://api.rule34.xxx"}, "pattern": r"realbooru\.com",
"safebooru": {"root": "https://safebooru.org"}, },
"tbib" : {"root": "https://tbib.org"}, "rule34": {
"root": "https://rule34.xxx",
"pattern": r"rule34\.xxx",
"api_root": "https://api.rule34.xxx",
},
"safebooru": {
"root": "https://safebooru.org",
"pattern": r"safebooru\.org",
},
"tbib": {
"root": "https://tbib.org",
"pattern": r"tbib\.org",
},
} }
BASE_PATTERN = GelbooruV02Extractor.update(INSTANCES) BASE_PATTERN = GelbooruV02Extractor.update(INSTANCES)
@@ -158,7 +170,7 @@ class GelbooruV02TagExtractor(GelbooruV02Extractor):
return {"search_tags": self.tags} return {"search_tags": self.tags}
def posts(self): def posts(self):
return self._pagination({"tags" : self.tags}) return self._pagination({"tags": self.tags})
class GelbooruV02PoolExtractor(GelbooruV02Extractor): class GelbooruV02PoolExtractor(GelbooruV02Extractor):

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2019-2021 Mike Fährmann # Copyright 2019-2022 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -60,12 +60,14 @@ class MastodonExtractor(BaseExtractor):
INSTANCES = { INSTANCES = {
"mastodon.social": { "mastodon.social": {
"root" : "https://mastodon.social", "root" : "https://mastodon.social",
"pattern" : r"mastodon\.social",
"access-token" : "Y06R36SMvuXXN5_wiPKFAEFiQaMSQg0o_hGgc86Jj48", "access-token" : "Y06R36SMvuXXN5_wiPKFAEFiQaMSQg0o_hGgc86Jj48",
"client-id" : "dBSHdpsnOUZgxOnjKSQrWEPakO3ctM7HmsyoOd4FcRo", "client-id" : "dBSHdpsnOUZgxOnjKSQrWEPakO3ctM7HmsyoOd4FcRo",
"client-secret": "DdrODTHs_XoeOsNVXnILTMabtdpWrWOAtrmw91wU1zI", "client-secret": "DdrODTHs_XoeOsNVXnILTMabtdpWrWOAtrmw91wU1zI",
}, },
"pawoo": { "pawoo": {
"root" : "https://pawoo.net", "root" : "https://pawoo.net",
"pattern" : r"pawoo\.net",
"access-token" : "c12c9d275050bce0dc92169a28db09d7" "access-token" : "c12c9d275050bce0dc92169a28db09d7"
"0d62d0a75a8525953098c167eacd3668", "0d62d0a75a8525953098c167eacd3668",
"client-id" : "978a25f843ec01e53d09be2c290cd75c" "client-id" : "978a25f843ec01e53d09be2c290cd75c"
@@ -75,6 +77,7 @@ INSTANCES = {
}, },
"baraag": { "baraag": {
"root" : "https://baraag.net", "root" : "https://baraag.net",
"pattern" : r"baraag\.net",
"access-token" : "53P1Mdigf4EJMH-RmeFOOSM9gdSDztmrAYFgabOKKE0", "access-token" : "53P1Mdigf4EJMH-RmeFOOSM9gdSDztmrAYFgabOKKE0",
"client-id" : "czxx2qilLElYHQ_sm-lO8yXuGwOHxLX9RYYaD0-nq1o", "client-id" : "czxx2qilLElYHQ_sm-lO8yXuGwOHxLX9RYYaD0-nq1o",
"client-secret": "haMaFdMBgK_-BIxufakmI2gFgkYjqmgXGEO2tB-R2xY", "client-secret": "haMaFdMBgK_-BIxufakmI2gFgkYjqmgXGEO2tB-R2xY",

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2020-2021 Mike Fährmann # Copyright 2020-2022 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -54,6 +54,7 @@ class MoebooruExtractor(BooruExtractor):
BASE_PATTERN = MoebooruExtractor.update({ BASE_PATTERN = MoebooruExtractor.update({
"yandere": { "yandere": {
"root": "https://yande.re", "root": "https://yande.re",
"pattern": r"yande\.re",
}, },
"konachan": { "konachan": {
"root": "https://konachan.com", "root": "https://konachan.com",
@@ -61,6 +62,7 @@ BASE_PATTERN = MoebooruExtractor.update({
}, },
"hypnohub": { "hypnohub": {
"root": "https://hypnohub.net", "root": "https://hypnohub.net",
"pattern": r"hypnohub\.net",
}, },
"sakugabooru": { "sakugabooru": {
"root": "https://www.sakugabooru.com", "root": "https://www.sakugabooru.com",
@@ -68,6 +70,7 @@ BASE_PATTERN = MoebooruExtractor.update({
}, },
"lolibooru": { "lolibooru": {
"root": "https://lolibooru.moe", "root": "https://lolibooru.moe",
"pattern": r"lolibooru\.moe",
}, },
}) })

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2021 Mike Fährmann # Copyright 2021-2022 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -58,12 +58,21 @@ class PhilomenaExtractor(BooruExtractor):
INSTANCES = { INSTANCES = {
"derpibooru": {"root": "https://derpibooru.org", "derpibooru": {
"filter_id": "56027"}, "root": "https://derpibooru.org",
"ponybooru" : {"root": "https://ponybooru.org", "pattern": r"derpibooru\.org",
"filter_id": "2"}, "filter_id": "56027",
"furbooru" : {"root": "https://furbooru.org", },
"filter_id": "2"}, "ponybooru": {
"root": "https://ponybooru.org",
"pattern": r"ponybooru\.org",
"filter_id": "2",
},
"furbooru": {
"root": "https://furbooru.org",
"pattern": r"furbooru\.org",
"filter_id": "2",
},
} }
BASE_PATTERN = PhilomenaExtractor.update(INSTANCES) BASE_PATTERN = PhilomenaExtractor.update(INSTANCES)
@@ -239,5 +248,5 @@ class PhilomenaGalleryExtractor(PhilomenaExtractor):
def posts(self): def posts(self):
gallery_id = "gallery_id:" + self.gallery_id gallery_id = "gallery_id:" + self.gallery_id
url = self.root + "/api/v1/json/search/images" url = self.root + "/api/v1/json/search/images"
params = {"sd": "desc", "sf": gallery_id, "q" : gallery_id} params = {"sd": "desc", "sf": gallery_id, "q": gallery_id}
return self._pagination(url, params) return self._pagination(url, params)

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2019-2021 Mike Fährmann # Copyright 2019-2022 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -160,6 +160,7 @@ BASE_PATTERN = ReactorExtractor.update({
}, },
"thatpervert": { "thatpervert": {
"root": "http://thatpervert.com", "root": "http://thatpervert.com",
"pattern": r"thatpervert\.com",
}, },
}) })

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2019-2021 Mike Fährmann # Copyright 2019-2022 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -49,9 +49,11 @@ BASE_PATTERN = ShopifyExtractor.update({
}, },
"omgmiamiswimwear": { "omgmiamiswimwear": {
"root": "https://www.omgmiamiswimwear.com", "root": "https://www.omgmiamiswimwear.com",
"pattern": r"(?:www\.)?omgmiamiswimwear\.com",
}, },
"windsorstore": { "windsorstore": {
"root": "https://www.windsorstore.com", "root": "https://www.windsorstore.com",
"pattern": r"(?:www\.)?windsorstore\.com",
}, },
"loungeunderwear": { "loungeunderwear": {
"root": "https://loungeunderwear.com", "root": "https://loungeunderwear.com",