From 64c8e502ca87c647be25a487848c0b9e2e3d54e7 Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Sun, 5 Jun 2016 23:55:54 +0200 Subject: [PATCH] Ignore case for comic download directories. Since we already match comics case-insensitive on the command line, this was a logical step, even if this means changing quite a bit of code that all tries to resolve the "comic directory" in a slightly different way... --- dosagelib/comic.py | 29 +++++++--------- dosagelib/director.py | 16 +++++---- dosagelib/events.py | 77 ++++++++++++++++++++++++------------------- dosagelib/scraper.py | 29 ++++++++++++---- dosagelib/util.py | 5 --- 5 files changed, 89 insertions(+), 67 deletions(-) diff --git a/dosagelib/comic.py b/dosagelib/comic.py index 9a30ee5a5..5a4605f3d 100755 --- a/dosagelib/comic.py +++ b/dosagelib/comic.py @@ -12,7 +12,7 @@ import contextlib from datetime import datetime from .output import out -from .util import unquote, getDirname, getFilename, urlopen, strsize +from .util import unquote, getFilename, urlopen, strsize from .events import getHandler @@ -25,13 +25,11 @@ RFC_1123_DT_STR = "%a, %d %b %Y %H:%M:%S GMT" class ComicStrip(object): """A list of comic image URLs.""" - def __init__(self, name, strip_url, image_urls, namer, session, text=None): + def __init__(self, scraper, strip_url, image_urls, text=None): """Store the image URL list.""" - self.name = name + self.scraper = scraper self.strip_url = strip_url self.image_urls = image_urls - self.namer = namer - self.session = session self.text = text def getImages(self): @@ -41,12 +39,11 @@ class ComicStrip(object): def getDownloader(self, url): """Get an image downloader.""" - filename = self.namer(url, self.strip_url) + filename = self.scraper.namer(url, self.strip_url) if filename is None: filename = url.rsplit('/', 1)[1] - dirname = getDirname(self.name) - return ComicImage(self.name, url, self.strip_url, dirname, filename, - self.session, text=self.text) + return ComicImage(self.scraper, url, self.strip_url, filename, + text=self.text) class ComicImage(object): @@ -54,16 +51,13 @@ class ComicImage(object): ChunkBytes = 1024 * 100 # 100KB - def __init__(self, name, url, referrer, dirname, filename, session, - text=None): + def __init__(self, scraper, url, referrer, filename, text=None): """Set URL and filename.""" - self.name = name + self.scraper = scraper self.referrer = referrer self.url = url - self.dirname = dirname filename = getFilename(filename) self.filename, self.ext = os.path.splitext(filename) - self.session = session self.text = text def connect(self, lastchange=None): @@ -71,7 +65,8 @@ class ComicImage(object): headers = {} if lastchange: headers['If-Modified-Since'] = lastchange.strftime(RFC_1123_DT_STR) - self.urlobj = urlopen(self.url, self.session, referrer=self.referrer, + self.urlobj = urlopen(self.url, self.scraper.session, + referrer=self.referrer, max_content_bytes=MaxImageBytes, stream=True, headers=headers) if self.urlobj.status_code == 304: # Not modified @@ -97,7 +92,7 @@ class ComicImage(object): def save(self, basepath): """Save comic URL to filename on disk.""" - comicdir = os.path.join(basepath, self.dirname) + comicdir = self.scraper.get_download_dir(basepath) if not os.path.isdir(comicdir): os.makedirs(comicdir) fnbase = os.path.join(comicdir, self.filename) @@ -125,7 +120,7 @@ class ComicImage(object): out.debug(u'Writing comic text to file %s...' % fntext) with self.fileout(fntext, encoding='utf-8') as f: f.write(self.text) - getHandler().comicDownloaded(self, fn, text=self.text) + getHandler().comicDownloaded(self, fn) return fn, True @contextlib.contextmanager diff --git a/dosagelib/director.py b/dosagelib/director.py index e85fba1c8..6cd5583f8 100644 --- a/dosagelib/director.py +++ b/dosagelib/director.py @@ -13,7 +13,6 @@ from six.moves.urllib.parse import urlparse from .output import out from . import events, scraper -from .util import getDirname class ComicQueue(Queue): @@ -196,11 +195,8 @@ def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False, listi # only scrapers whose directory already exists if len(comics) > 1: out.warn(u"using '@' as comic name ignores all other specified comics.") - for scraperobj in scraper.get_scrapers(include_removed=True): - dirname = getDirname(scraperobj.name) - if os.path.isdir(os.path.join(basepath, dirname)): - if shouldRunScraper(scraperobj, adult, listing): - yield scraperobj + for comic in get_existing_comics(basepath, adult, listing): + yield comic else: # get only selected comic scrapers # store them in a set to eliminate duplicates @@ -228,6 +224,14 @@ def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False, listi yield scraperobj +def get_existing_comics(basepath=None, adult=True, listing=False): + for scraperobj in scraper.get_scrapers(include_removed=True): + dirname = scraperobj.get_download_dir(basepath) + if os.path.isdir(dirname): + if shouldRunScraper(scraperobj, adult, listing): + yield scraperobj + + def shouldRunScraper(scraperobj, adult=True, listing=False): if listing: return True diff --git a/dosagelib/events.py b/dosagelib/events.py index 3b2ad4176..9b70e5c7c 100644 --- a/dosagelib/events.py +++ b/dosagelib/events.py @@ -48,12 +48,23 @@ class EventHandler(object): """Emit a start event. Should be overridden in subclass.""" pass - def comicDownloaded(self, comic, filename, text=None): - """Emit a comic downloaded event. Should be overridden in subclass.""" + def comicDownloaded(self, comic, filename): + """Emit a comic downloaded event. Should be overridden in subclass. + Parameters are: + + comic: The ComicImage class calling this event + filename: The target filename + """ pass - def comicPageLink(self, comic, url, prevUrl): - """Emit an event to inform the handler about links between comic pages. Should be overridden in subclass.""" + def comicPageLink(self, scraper, url, prevUrl): + """Emit an event to inform the handler about links between comic pages. + Should be overridden in subclass. Parameters are: + + scraper: The Scraper class calling this event + url: The current page url + prevUrl: The previous page url + """ pass def end(self): @@ -88,20 +99,20 @@ class RSSEventHandler(EventHandler): self.newfile = True self.rss = rss.Feed('Daily Dosage', link, 'Comics for %s' % time.strftime('%Y/%m/%d', today)) - def comicDownloaded(self, comic, filename, text=None): + def comicDownloaded(self, comic, filename): """Write RSS entry for downloaded comic.""" imageUrl = self.getUrlFromFilename(filename) size = None if self.allowdownscale: size = getDimensionForImage(filename, MaxImageSize) - title = '%s - %s' % (comic.name, os.path.basename(filename)) + title = '%s - %s' % (comic.scraper.name, os.path.basename(filename)) pageUrl = comic.referrer description = '\n') if text: self.html.write(u'
%s\n' % text) - self.lastComic = comic.name + self.lastComic = comic.scraper.name self.lastUrl = pageUrl def newComic(self, comic): @@ -226,7 +237,7 @@ class HtmlEventHandler(EventHandler): self.html.write(u'\n') if self.lastComic is not None: self.html.write(u'\n') - self.html.write(u'
  • %s
  • \n' % comic.name) + self.html.write(u'
  • %s
  • \n' % comic.scraper.name) self.html.write(u'