From 05dbc51d3e574258f49cd4159f9581cf64275fa4 Mon Sep 17 00:00:00 2001 From: Bastian Kleineidam Date: Thu, 25 Apr 2013 22:40:06 +0200 Subject: [PATCH] Detect completed end-of-life comics. --- doc/changelog.txt | 7 +++++-- dosage | 6 ++++++ dosagelib/scraper.py | 23 ++++++++++++++++++++++- 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/doc/changelog.txt b/doc/changelog.txt index 2cc4465e8..6d4216865 100644 --- a/doc/changelog.txt +++ b/doc/changelog.txt @@ -3,8 +3,10 @@ Dosage 2.2 (released xx.xx.2013) Features: - comic: Added ARedTailsDream, Carciphona, Curtailed, GirlGenius, Lackadaisy, SabrinaOnline, TheDreamlandChronicles, TwoGuysAndGuy - and Unsound. Patch by Dirk Reiners. + and Unsound. Patches by Dirk Reiners. Closes: GH bug #29 +- comic: Comics which are not updated anymore can now be marked. + Closes: GH bug #30 Changes: - cmdline: Ignore trailing '/' at end of comic names. Useful @@ -12,7 +14,8 @@ Changes: Fixes: - comics: Fix DorkTower, MadamAndEve and Saturday Morning Breakfast - Cereal, and improve image filenames of CtrlAltDel. Patch by Dirk Reiners. + Cereal, and improve image filenames of CtrlAltDel. + Patches by Dirk Reiners. Dosage 2.1 (released 14.4.2013) diff --git a/dosage b/dosage index 2f3a37b21..e87c9797f 100755 --- a/dosage +++ b/dosage @@ -240,6 +240,9 @@ def getStrips(scraperobj, options): numstrips = 1 out.context = scraperobj.getName() try: + if scraperobj.isComplete(options.basepath): + out.info("All comics are already downloaded.") + return 0 for strip in scraperobj.getStrips(numstrips): _errors, skipped = saveComicStrip(strip, options.basepath, options.dry_run) errors += _errors @@ -247,6 +250,9 @@ def getStrips(scraperobj, options): # stop when retrieval skipped an image for one comic strip out.info("Stop retrieval because image file already exists") break + if options.all and not (errors or options.dry_run or + options.cont or scraperobj.indexes): + scraperobj.setComplete(options.basepath) except Exception as msg: out.exception(msg) errors += 1 diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py index 00619c96d..e6b3b0858 100644 --- a/dosagelib/scraper.py +++ b/dosagelib/scraper.py @@ -3,9 +3,10 @@ # Copyright (C) 2012-2013 Bastian Kleineidam import requests import time +import os from . import loader, configuration from .util import (fetchUrl, fetchUrls, getPageContent, makeSequence, - get_system_uid, urlopen) + get_system_uid, urlopen, getDirname) from .comic import ComicStrip from .output import out from .events import getHandler @@ -45,6 +46,9 @@ class _BasicScraper(object): # set to True if this comic contains adult content adult = False + # set to True if this comic will not get updated anymore + endOfLife = False + # a description of the comic contents description = '' @@ -237,6 +241,23 @@ class _BasicScraper(object): page = urlopen(url, cls.session, data=data) return page.text + def getCompleteFile(self, basepath): + """Get filename indicating all comics are downloaded.""" + dirname = getDirname(self.getName()) + return os.path.join(basepath, dirname, "complete.txt") + + def isComplete(self, basepath): + """Check if all comics are downloaded.""" + return os.path.isfile(self.getCompleteFile(basepath)) + + def setComplete(self, basepath): + """Set complete flag for this comic, ie. all comics are downloaded.""" + if self.endOfLife: + filename = self.getCompleteFile(basepath) + if not os.path.exists(filename): + with open(filename, 'w') as f: + f.write('All comics should be downloaded here.') + def find_scraperclasses(comic, multiple_allowed=False): """Get a list comic scraper classes. Can return more than one entries if