From 8d84361de4cc8d08272f84c58425057a602cf7d4 Mon Sep 17 00:00:00 2001 From: sizlo Date: Tue, 18 Apr 2017 21:58:12 +0100 Subject: [PATCH] Preserve the order we found images in when removing duplicate images --- dosagelib/scraper.py | 4 ++-- dosagelib/util.py | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py index 21099540b..457d908f8 100644 --- a/dosagelib/scraper.py +++ b/dosagelib/scraper.py @@ -27,7 +27,7 @@ except ImportError: from . import loader, configuration, languages from .util import (get_page, makeSequence, get_system_uid, urlopen, unescape, tagre, normaliseURL, prettyMatcherList, - requests_session) + requests_session, uniq) from .comic import ComicStrip from .output import out from .events import getHandler @@ -137,7 +137,7 @@ class Scraper(object): # map modifier function on image URLs imageUrls = [self.imageUrlModifier(x, data) for x in imageUrls] # remove duplicate URLs - imageUrls = set(imageUrls) + imageUrls = uniq(imageUrls) if len(imageUrls) > 1 and not self.multipleImagesPerStrip: out.warn( u"Found %d images instead of 1 at %s with expressions %s" % diff --git a/dosagelib/util.py b/dosagelib/util.py index 35086dfe3..001a52b3a 100644 --- a/dosagelib/util.py +++ b/dosagelib/util.py @@ -522,3 +522,12 @@ def strlimit(s, length=72): if length == 0: return "" return "%s..." % s[:length] + + +def uniq(input): + """Remove duplicates from a list while preserving the list order""" + output = [] + for item in input: + if item not in output: + output.append(item) + return output