[idolcomplex] fix extraction & update URL patterns (#5002)
This commit is contained in:
@@ -34,8 +34,11 @@ class IdolcomplexExtractor(SankakuExtractor):
|
|||||||
self.start_post = 0
|
self.start_post = 0
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
|
self.find_pids = re.compile(
|
||||||
|
r" href=[\"#]/\w\w/posts/([0-9a-f]+)"
|
||||||
|
).findall
|
||||||
self.find_tags = re.compile(
|
self.find_tags = re.compile(
|
||||||
r'tag-type-([^"]+)">\s*<div [^>]+>\s*<a href="/\?tags=([^"]+)'
|
r'tag-type-([^"]+)">\s*<a [^>]*?href="/[^?]*\?tags=([^"]+)'
|
||||||
).findall
|
).findall
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
@@ -149,8 +152,8 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
|
|||||||
subcategory = "tag"
|
subcategory = "tag"
|
||||||
directory_fmt = ("{category}", "{search_tags}")
|
directory_fmt = ("{category}", "{search_tags}")
|
||||||
archive_fmt = "t_{search_tags}_{id}"
|
archive_fmt = "t_{search_tags}_{id}"
|
||||||
pattern = r"(?:https?://)?idol\.sankakucomplex\.com/\?([^#]*)"
|
pattern = BASE_PATTERN + r"/(?:posts/?)?\?([^#]*)"
|
||||||
example = "https://idol.sankakucomplex.com/?tags=TAGS"
|
example = "https://idol.sankakucomplex.com/en/posts?tags=TAGS"
|
||||||
per_page = 20
|
per_page = 20
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
@@ -196,7 +199,8 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
|
|||||||
page = self.request(self.root, params=params, retries=10).text
|
page = self.request(self.root, params=params, retries=10).text
|
||||||
pos = ((page.find('id="more-popular-posts-link"') + 1) or
|
pos = ((page.find('id="more-popular-posts-link"') + 1) or
|
||||||
(page.find('<span class="thumb') + 1))
|
(page.find('<span class="thumb') + 1))
|
||||||
yield from text.extract_iter(page, ' href="/posts/', '"', pos)
|
|
||||||
|
yield from self.find_pids(page, pos)
|
||||||
|
|
||||||
next_url = text.extract(page, 'next-page-url="', '"', pos)[0]
|
next_url = text.extract(page, 'next-page-url="', '"', pos)[0]
|
||||||
if not next_url:
|
if not next_url:
|
||||||
@@ -218,7 +222,7 @@ class IdolcomplexPoolExtractor(IdolcomplexExtractor):
|
|||||||
subcategory = "pool"
|
subcategory = "pool"
|
||||||
directory_fmt = ("{category}", "pool", "{pool}")
|
directory_fmt = ("{category}", "pool", "{pool}")
|
||||||
archive_fmt = "p_{pool}_{id}"
|
archive_fmt = "p_{pool}_{id}"
|
||||||
pattern = r"(?:https?://)?idol\.sankakucomplex\.com/pools?/show/(\d+)"
|
pattern = BASE_PATTERN + r"/pools?/show/(\d+)"
|
||||||
example = "https://idol.sankakucomplex.com/pools/show/12345"
|
example = "https://idol.sankakucomplex.com/pools/show/12345"
|
||||||
per_page = 24
|
per_page = 24
|
||||||
|
|
||||||
@@ -242,8 +246,7 @@ class IdolcomplexPoolExtractor(IdolcomplexExtractor):
|
|||||||
while True:
|
while True:
|
||||||
page = self.request(url, params=params, retries=10).text
|
page = self.request(url, params=params, retries=10).text
|
||||||
pos = page.find('id="pool-show"') + 1
|
pos = page.find('id="pool-show"') + 1
|
||||||
post_ids = list(text.extract_iter(
|
post_ids = self.find_pids(page, pos)
|
||||||
page, ' href="/posts/', '"', pos))
|
|
||||||
|
|
||||||
yield from post_ids
|
yield from post_ids
|
||||||
if len(post_ids) < self.per_page:
|
if len(post_ids) < self.per_page:
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ from gallery_dl.extractor import idolcomplex
|
|||||||
|
|
||||||
__tests__ = (
|
__tests__ = (
|
||||||
{
|
{
|
||||||
"#url" : "https://idol.sankakucomplex.com/?tags=lyumos",
|
"#url" : "https://idol.sankakucomplex.com/en/posts?tags=lyumos",
|
||||||
"#category": ("booru", "idolcomplex", "tag"),
|
"#category": ("booru", "idolcomplex", "tag"),
|
||||||
"#class" : idolcomplex.IdolcomplexTagExtractor,
|
"#class" : idolcomplex.IdolcomplexTagExtractor,
|
||||||
"#pattern" : r"https://i[sv]\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}/[^/]{32}\.\w+\?e=\d+&m=[^&#]+",
|
"#pattern" : r"https://i[sv]\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}/[^/]{32}\.\w+\?e=\d+&m=[^&#]+",
|
||||||
@@ -17,6 +17,24 @@ __tests__ = (
|
|||||||
"#count" : 5,
|
"#count" : 5,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://idol.sankakucomplex.com/posts/?tags=lyumos",
|
||||||
|
"#category": ("booru", "idolcomplex", "tag"),
|
||||||
|
"#class" : idolcomplex.IdolcomplexTagExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://idol.sankakucomplex.com/en/?tags=lyumos",
|
||||||
|
"#category": ("booru", "idolcomplex", "tag"),
|
||||||
|
"#class" : idolcomplex.IdolcomplexTagExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://idol.sankakucomplex.com/?tags=lyumos",
|
||||||
|
"#category": ("booru", "idolcomplex", "tag"),
|
||||||
|
"#class" : idolcomplex.IdolcomplexTagExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://idol.sankakucomplex.com/?tags=lyumos+wreath&page=3&next=694215",
|
"#url" : "https://idol.sankakucomplex.com/?tags=lyumos+wreath&page=3&next=694215",
|
||||||
"#category": ("booru", "idolcomplex", "tag"),
|
"#category": ("booru", "idolcomplex", "tag"),
|
||||||
@@ -30,6 +48,12 @@ __tests__ = (
|
|||||||
"#count" : 3,
|
"#count" : 3,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://idol.sankakucomplex.com/en/pools/show/145",
|
||||||
|
"#category": ("booru", "idolcomplex", "pool"),
|
||||||
|
"#class" : idolcomplex.IdolcomplexPoolExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://idol.sankakucomplex.com/pool/show/145",
|
"#url" : "https://idol.sankakucomplex.com/pool/show/145",
|
||||||
"#category": ("booru", "idolcomplex", "pool"),
|
"#category": ("booru", "idolcomplex", "pool"),
|
||||||
@@ -37,7 +61,7 @@ __tests__ = (
|
|||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://idol.sankakucomplex.com/en/posts/show/509eccbba54a43cea6b275a65b93c51d",
|
"#url" : "https://idol.sankakucomplex.com/en/posts/509eccbba54a43cea6b275a65b93c51d",
|
||||||
"#category": ("booru", "idolcomplex", "post"),
|
"#category": ("booru", "idolcomplex", "post"),
|
||||||
"#class" : idolcomplex.IdolcomplexPostExtractor,
|
"#class" : idolcomplex.IdolcomplexPostExtractor,
|
||||||
"#sha1_content": "694ec2491240787d75bf5d0c75d0082b53a85afd",
|
"#sha1_content": "694ec2491240787d75bf5d0c75d0082b53a85afd",
|
||||||
@@ -45,7 +69,7 @@ __tests__ = (
|
|||||||
"created_at" : "2017-11-24 17:01:27.696",
|
"created_at" : "2017-11-24 17:01:27.696",
|
||||||
"date" : "dt:2017-11-24 17:01:27",
|
"date" : "dt:2017-11-24 17:01:27",
|
||||||
"extension" : "jpg",
|
"extension" : "jpg",
|
||||||
"file_url" : r"re:https://is\.sankakucomplex\.com/data/50/9e/509eccbba54a43cea6b275a65b93c51d\.jpg\?",
|
"file_url" : r"re:https://i[sv]\.sankakucomplex\.com/data/50/9e/509eccbba54a43cea6b275a65b93c51d\.jpg\?",
|
||||||
"filename" : "509eccbba54a43cea6b275a65b93c51d",
|
"filename" : "509eccbba54a43cea6b275a65b93c51d",
|
||||||
"height" : 683,
|
"height" : 683,
|
||||||
"id" : 694215,
|
"id" : 694215,
|
||||||
@@ -62,6 +86,12 @@ __tests__ = (
|
|||||||
"width" : 1024,
|
"width" : 1024,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://idol.sankakucomplex.com/en/posts/show/509eccbba54a43cea6b275a65b93c51d",
|
||||||
|
"#category": ("booru", "idolcomplex", "post"),
|
||||||
|
"#class" : idolcomplex.IdolcomplexPostExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://idol.sankakucomplex.com/posts/509eccbba54a43cea6b275a65b93c51d",
|
"#url" : "https://idol.sankakucomplex.com/posts/509eccbba54a43cea6b275a65b93c51d",
|
||||||
"#category": ("booru", "idolcomplex", "post"),
|
"#category": ("booru", "idolcomplex", "post"),
|
||||||
|
|||||||
Reference in New Issue
Block a user