From bff71cde80e58c1447d8d9e6c65cae75ae139489 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 2 Mar 2021 23:01:38 +0100 Subject: [PATCH] implement 'util.unique_squence()' --- gallery_dl/extractor/sankakucomplex.py | 11 ++++------- gallery_dl/util.py | 11 ++++++++++- test/test_util.py | 12 +++++++++++- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/gallery_dl/extractor/sankakucomplex.py b/gallery_dl/extractor/sankakucomplex.py index 972750c2..5d832993 100644 --- a/gallery_dl/extractor/sankakucomplex.py +++ b/gallery_dl/extractor/sankakucomplex.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2020 Mike Fährmann +# Copyright 2019-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -125,17 +125,14 @@ class SankakucomplexTagExtractor(SankakucomplexExtractor): def items(self): pnum = 1 - last = None data = {"_extractor": SankakucomplexArticleExtractor} - yield Message.Version, 1 while True: url = "{}/{}/page/{}/".format(self.root, self.path, pnum) response = self.request(url, fatal=False) if response.status_code >= 400: return - for url in text.extract_iter(response.text, 'data-direct="', '"'): - if url != last: - last = url - yield Message.Queue, url, data + for url in util.unique_sequence(text.extract_iter( + response.text, 'data-direct="', '"')): + yield Message.Queue, url, data pnum += 1 diff --git a/gallery_dl/util.py b/gallery_dl/util.py index a858079c..2466adf7 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2017-2020 Mike Fährmann +# Copyright 2017-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -65,6 +65,15 @@ def unique(iterable): yield element +def unique_sequence(iterable): + """Yield sequentially unique elements from 'iterable'""" + last = None + for element in iterable: + if element != last: + last = element + yield element + + def raises(cls): """Returns a function that raises 'cls' as exception""" def wrap(*args): diff --git a/test/test_util.py b/test/test_util.py index 8848ea0d..06de7358 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# Copyright 2015-2020 Mike Fährmann +# Copyright 2015-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -461,6 +461,16 @@ class TestOther(unittest.TestCase): self.assertSequenceEqual( list(util.unique([1, 2, 1, 3, 2, 1])), [1, 2, 3]) + def test_unique_sequence(self): + self.assertSequenceEqual( + list(util.unique_sequence("")), "") + self.assertSequenceEqual( + list(util.unique_sequence("AABBCC")), "ABC") + self.assertSequenceEqual( + list(util.unique_sequence("ABABABCAABBCC")), "ABABABCABC") + self.assertSequenceEqual( + list(util.unique_sequence([1, 2, 1, 3, 2, 1])), [1, 2, 1, 3, 2, 1]) + def test_raises(self): func = util.raises(Exception) with self.assertRaises(Exception):