From 8d84361de4cc8d08272f84c58425057a602cf7d4 Mon Sep 17 00:00:00 2001 From: sizlo Date: Tue, 18 Apr 2017 21:58:12 +0100 Subject: [PATCH 1/4] Preserve the order we found images in when removing duplicate images --- dosagelib/scraper.py | 4 ++-- dosagelib/util.py | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py index 21099540b..457d908f8 100644 --- a/dosagelib/scraper.py +++ b/dosagelib/scraper.py @@ -27,7 +27,7 @@ except ImportError: from . import loader, configuration, languages from .util import (get_page, makeSequence, get_system_uid, urlopen, unescape, tagre, normaliseURL, prettyMatcherList, - requests_session) + requests_session, uniq) from .comic import ComicStrip from .output import out from .events import getHandler @@ -137,7 +137,7 @@ class Scraper(object): # map modifier function on image URLs imageUrls = [self.imageUrlModifier(x, data) for x in imageUrls] # remove duplicate URLs - imageUrls = set(imageUrls) + imageUrls = uniq(imageUrls) if len(imageUrls) > 1 and not self.multipleImagesPerStrip: out.warn( u"Found %d images instead of 1 at %s with expressions %s" % diff --git a/dosagelib/util.py b/dosagelib/util.py index 35086dfe3..001a52b3a 100644 --- a/dosagelib/util.py +++ b/dosagelib/util.py @@ -522,3 +522,12 @@ def strlimit(s, length=72): if length == 0: return "" return "%s..." % s[:length] + + +def uniq(input): + """Remove duplicates from a list while preserving the list order""" + output = [] + for item in input: + if item not in output: + output.append(item) + return output From a83911aa67dcd0e921a291f7c7dcfc7c08d2ef7f Mon Sep 17 00:00:00 2001 From: sizlo Date: Tue, 18 Apr 2017 21:59:04 +0100 Subject: [PATCH 2/4] Favour the first image we found when we're not expecting multiple images --- dosagelib/scraper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py index 457d908f8..a14fdd9f1 100644 --- a/dosagelib/scraper.py +++ b/dosagelib/scraper.py @@ -142,7 +142,7 @@ class Scraper(object): out.warn( u"Found %d images instead of 1 at %s with expressions %s" % (len(imageUrls), url, prettyMatcherList(self.imageSearch))) - image = sorted(imageUrls)[0] + image = imageUrls[0] out.warn(u"Choosing image %s" % image) imageUrls = (image,) elif not imageUrls: From 95e48b8d8dc6fbeda665fe2e01e694713547efc8 Mon Sep 17 00:00:00 2001 From: Tim Brier Date: Wed, 19 Apr 2017 08:50:06 +0100 Subject: [PATCH 3/4] Keep track of the order of images for multi-image strips in the JSON output --- dosagelib/events.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dosagelib/events.py b/dosagelib/events.py index 71d4a8f46..f19d2b19b 100644 --- a/dosagelib/events.py +++ b/dosagelib/events.py @@ -287,6 +287,13 @@ class JSONEventHandler(EventHandler): def comicDownloaded(self, comic, filename): """Add URL-to-filename mapping into JSON.""" pageInfo = self.getPageInfo(comic.scraper, comic.referrer) + + # If there's already an image for this page start keeping track of their order + if len(pageInfo['images'].keys()) == 1: + pageInfo['imagesOrder'] = [pageInfo['images'].keys()[0]] + if 'imagesOrder' in pageInfo.keys(): + pageInfo['imagesOrder'].append(comic.url) + pageInfo['images'][comic.url] = os.path.basename(filename) def comicPageLink(self, scraper, url, prevUrl): From 08d7c55621944f376c0e258273ef69af567a79ce Mon Sep 17 00:00:00 2001 From: Tim Brier Date: Wed, 19 Apr 2017 08:51:43 +0100 Subject: [PATCH 4/4] Obey the ordering of multi image strips given in the JSON --- scripts/order-symlinks.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/order-symlinks.py b/scripts/order-symlinks.py index 220ef2a99..0266fb60d 100755 --- a/scripts/order-symlinks.py +++ b/scripts/order-symlinks.py @@ -57,7 +57,11 @@ def create_symlinks(d): order = [] work = latest while work in data["pages"]: - order.extend(data["pages"][work]["images"].values()) + if "imagesOrder" in data["pages"][work].keys(): + for url in reversed(data["pages"][work]["imagesOrder"]): + order.append(data["pages"][work]["images"][url]) + else: + order.extend(data["pages"][work]["images"].values()) if "prev" in data["pages"][work]: work = data["pages"][work]["prev"] else: