diff --git a/dosagelib/events.py b/dosagelib/events.py index 43a6e1ff7..cf611b37a 100644 --- a/dosagelib/events.py +++ b/dosagelib/events.py @@ -287,6 +287,13 @@ class JSONEventHandler(EventHandler): def comicDownloaded(self, comic, filename): """Add URL-to-filename mapping into JSON.""" pageInfo = self.getPageInfo(comic.scraper, comic.referrer) + + # If there's already an image for this page start keeping track of their order + if len(pageInfo['images'].keys()) == 1: + pageInfo['imagesOrder'] = [pageInfo['images'].keys()[0]] + if 'imagesOrder' in pageInfo.keys(): + pageInfo['imagesOrder'].append(comic.url) + pageInfo['images'][comic.url] = os.path.basename(filename) def comicPageLink(self, scraper, url, prevUrl): diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py index 21099540b..a14fdd9f1 100644 --- a/dosagelib/scraper.py +++ b/dosagelib/scraper.py @@ -27,7 +27,7 @@ except ImportError: from . import loader, configuration, languages from .util import (get_page, makeSequence, get_system_uid, urlopen, unescape, tagre, normaliseURL, prettyMatcherList, - requests_session) + requests_session, uniq) from .comic import ComicStrip from .output import out from .events import getHandler @@ -137,12 +137,12 @@ class Scraper(object): # map modifier function on image URLs imageUrls = [self.imageUrlModifier(x, data) for x in imageUrls] # remove duplicate URLs - imageUrls = set(imageUrls) + imageUrls = uniq(imageUrls) if len(imageUrls) > 1 and not self.multipleImagesPerStrip: out.warn( u"Found %d images instead of 1 at %s with expressions %s" % (len(imageUrls), url, prettyMatcherList(self.imageSearch))) - image = sorted(imageUrls)[0] + image = imageUrls[0] out.warn(u"Choosing image %s" % image) imageUrls = (image,) elif not imageUrls: diff --git a/dosagelib/util.py b/dosagelib/util.py index 35086dfe3..001a52b3a 100644 --- a/dosagelib/util.py +++ b/dosagelib/util.py @@ -522,3 +522,12 @@ def strlimit(s, length=72): if length == 0: return "" return "%s..." % s[:length] + + +def uniq(input): + """Remove duplicates from a list while preserving the list order""" + output = [] + for item in input: + if item not in output: + output.append(item) + return output diff --git a/scripts/order-symlinks.py b/scripts/order-symlinks.py index 220ef2a99..0266fb60d 100755 --- a/scripts/order-symlinks.py +++ b/scripts/order-symlinks.py @@ -57,7 +57,11 @@ def create_symlinks(d): order = [] work = latest while work in data["pages"]: - order.extend(data["pages"][work]["images"].values()) + if "imagesOrder" in data["pages"][work].keys(): + for url in reversed(data["pages"][work]["imagesOrder"]): + order.append(data["pages"][work]["images"][url]) + else: + order.extend(data["pages"][work]["images"].values()) if "prev" in data["pages"][work]: work = data["pages"][work]["prev"] else: