[shopify] use alternate regex for products on collection pages
when the first on doesn't yield any results
This commit is contained in:
@@ -74,21 +74,33 @@ class ShopifyCollectionExtractor(ShopifyExtractor):
|
|||||||
def products(self):
|
def products(self):
|
||||||
params = text.parse_query(self.params)
|
params = text.parse_query(self.params)
|
||||||
params["page"] = text.parse_int(params.get("page"), 1)
|
params["page"] = text.parse_int(params.get("page"), 1)
|
||||||
search_re = re.compile(r"/collections/[\w-]+/products/[\w-]+")
|
fetch = True
|
||||||
|
last = None
|
||||||
|
|
||||||
while True:
|
for pattern in (
|
||||||
page = self.request(self.item_url, params=params).text
|
r"/collections/[\w-]+/products/[\w-]+",
|
||||||
urls = search_re.findall(page)
|
r"href=[\"'](/products/[\w-]+)",
|
||||||
last = None
|
):
|
||||||
|
search_re = re.compile(pattern)
|
||||||
|
|
||||||
if not urls:
|
while True:
|
||||||
return
|
if fetch:
|
||||||
for path in urls:
|
page = self.request(self.item_url, params=params).text
|
||||||
if last == path:
|
urls = search_re.findall(page)
|
||||||
continue
|
|
||||||
last = path
|
if len(urls) < 3:
|
||||||
yield self.root + path
|
if last:
|
||||||
params["page"] += 1
|
return
|
||||||
|
fetch = False
|
||||||
|
break
|
||||||
|
fetch = True
|
||||||
|
|
||||||
|
for path in urls:
|
||||||
|
if last == path:
|
||||||
|
continue
|
||||||
|
last = path
|
||||||
|
yield self.root + path
|
||||||
|
params["page"] += 1
|
||||||
|
|
||||||
|
|
||||||
class ShopifyProductExtractor(ShopifyExtractor):
|
class ShopifyProductExtractor(ShopifyExtractor):
|
||||||
@@ -121,7 +133,6 @@ EXTRACTORS = {
|
|||||||
("https://www.fashionnova.com/collections/mini-dresses/?page=1"),
|
("https://www.fashionnova.com/collections/mini-dresses/?page=1"),
|
||||||
("https://www.fashionnova.com/collections/mini-dresses#1"),
|
("https://www.fashionnova.com/collections/mini-dresses#1"),
|
||||||
),
|
),
|
||||||
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user