From 6574997e01277ae1f5376a8e93d020339269d6fd Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Thu, 21 Apr 2016 23:52:31 +0200 Subject: [PATCH] Refactor: All the other class methods. Turns out, it would have been better if all methods had been instance methods and not class methods. This finished a big chunk of the rework needed for #42. --- dosagelib/plugins/e.py | 5 ++-- dosagelib/plugins/s.py | 9 +++---- dosagelib/plugins/wlpcomics.py | 10 +++---- dosagelib/scraper.py | 48 ++++++++++++++-------------------- 4 files changed, 29 insertions(+), 43 deletions(-) diff --git a/dosagelib/plugins/e.py b/dosagelib/plugins/e.py index a91517c14..08420e016 100644 --- a/dosagelib/plugins/e.py +++ b/dosagelib/plugins/e.py @@ -20,9 +20,8 @@ class EarthsongSaga(_ParserScraper): prevSearch = '//a[@title="Previous"]' latestSearch = '//div[@id="leftmenu"]/span[1]/a[1]' - @classmethod - def fetchUrls(cls, url, data, urlSearch): - urls = super(EarthsongSaga, cls).fetchUrls(url, data, urlSearch) + def fetchUrls(self, url, data, urlSearch): + urls = super(EarthsongSaga, self).fetchUrls(url, data, urlSearch) return [x.replace('earthsongsaga.com/../', 'earthsongsaga.com/') for x in urls] diff --git a/dosagelib/plugins/s.py b/dosagelib/plugins/s.py index 55d5d825a..416843de0 100644 --- a/dosagelib/plugins/s.py +++ b/dosagelib/plugins/s.py @@ -150,10 +150,9 @@ class ScurryAndCover(_ParserScraper): nextSearch = '//div[@id="nextpage"]/..' imageSearch = 'MARKER' - @classmethod - def fetchUrls(cls, url, data, urlSearch): - if urlSearch != cls.imageSearch: - return super(ScurryAndCover, cls).fetchUrls(url, data, urlSearch) + def fetchUrls(self, url, data, urlsearch): + if urlsearch != self.imageSearch: + return super(ScurryAndCover, self).fetchUrls(url, data, urlsearch) # get javascript element and parse a variable value scripts = data.xpath('//body/script[@type="text/javascript"]') @@ -163,7 +162,7 @@ class ScurryAndCover(_ParserScraper): images = regex.findall(script.text) if len(images) > 0: image = images[0] - return [cls.url + '/images/pages/' + image + '-xsmall.png'] + return [self.url + '/images/pages/' + image + '-xsmall.png'] def starter(self): """Go forward as far as possibe, then start.""" diff --git a/dosagelib/plugins/wlpcomics.py b/dosagelib/plugins/wlpcomics.py index 82bfb8512..28ed496cc 100644 --- a/dosagelib/plugins/wlpcomics.py +++ b/dosagelib/plugins/wlpcomics.py @@ -56,10 +56,8 @@ class Stellar(_WLPComics): url = 'http://www.wlpcomics.com/adult/stellar/' adult = True - @classmethod - def fetchUrls(cls, url, data, urlSearch): + def prevUrlModifier(self, prev_url): """Bugfix for empty page...""" - urls = super(Stellar, cls).fetchUrls(url, data, urlSearch) - if cls.url + '075.html' in urls: - urls = [cls.url + '074.html'] - return urls + if prev_url == self.url + '075.html': + return self.url + '074.html' + return prev_url diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py index 200ea460a..54778bb3f 100644 --- a/dosagelib/scraper.py +++ b/dosagelib/scraper.py @@ -280,8 +280,7 @@ class Scraper(object): with open(filename, 'w') as f: f.write('All comics should be downloaded here.') - @classmethod - def getPage(cls, url): + def getPage(self, url): """ Fetch a page and return the opaque repesentation for the data parameter of fetchUrls and fetchText. @@ -295,16 +294,13 @@ class Scraper(object): """ raise ValueError("No implementation for getPage!") - @classmethod - def fetchUrls(cls, url, data, urlSearch): + def fetchUrls(self, url, data, urlsearch): raise ValueError("No implementation for fetchUrls!") - @classmethod - def fetchUrl(cls, url, data, urlSearch): - return cls.fetchUrls(url, data, urlSearch)[0] + def fetchUrl(self, url, data, urlsearch): + return self.fetchUrls(url, data, urlsearch)[0] - @classmethod - def fetchText(cls, url, data, textSearch, optional): + def fetchText(self, url, data, textsearch, optional): raise ValueError("No implementation for fetchText!") def getDisabledReasons(self): @@ -351,20 +347,18 @@ class _BasicScraper(Scraper): BASE_SEARCH = re.compile(tagre("base", "href", '([^"]*)')) - @classmethod - def getPage(cls, url): - content = get_page(url, cls.session).text + def getPage(self, url): + content = get_page(url, self.session).text # determine base URL baseUrl = None - match = cls.BASE_SEARCH.search(content) + match = self.BASE_SEARCH.search(content) if match: baseUrl = match.group(1) else: baseUrl = url return (content, baseUrl) - @classmethod - def fetchUrls(cls, url, data, urlSearch): + def fetchUrls(self, url, data, urlSearch): """Search all entries for given URL pattern(s) in a HTML page.""" searchUrls = [] searches = makeSequence(urlSearch) @@ -386,8 +380,7 @@ class _BasicScraper(Scraper): (patterns, url)) return searchUrls - @classmethod - def fetchText(cls, url, data, textSearch, optional): + def fetchText(self, url, data, textSearch, optional): """Search text entry for given text pattern in a HTML page.""" if textSearch: match = textSearch.search(data[0]) @@ -434,31 +427,29 @@ class _ParserScraper(Scraper): # another Python module, XPath is the default for now. css = False - @classmethod - def getPage(cls, url): - page = get_page(url, cls.session) + def getPage(self, url): + page = get_page(url, self.session) if page.encoding: # Requests figured out the encoding, so we can deliver Unicode to # LXML. Unfortunatly, LXML feels betrayed if there is still an XML # declaration with (probably wrong!) encoding at the top of the # document. Web browsers ignore such if the encoding was specified # in the HTTP header and so do we. - text = cls.XML_DECL.sub('\1\2', page.text, count=1) + text = self.XML_DECL.sub('\1\2', page.text, count=1) tree = html.document_fromstring(text) else: tree = html.document_fromstring(page.content) tree.make_links_absolute(url) return tree - @classmethod - def fetchUrls(cls, url, data, urlSearch): + def fetchUrls(self, url, data, urlSearch): """Search all entries for given XPath in a HTML page.""" searchUrls = [] - if cls.css: + if self.css: searchFun = data.cssselect else: def searchFun(s): - return data.xpath(s, namespaces=cls.NS) + return data.xpath(s, namespaces=self.NS) searches = makeSequence(urlSearch) for search in searches: for match in searchFun(search): @@ -472,17 +463,16 @@ class _ParserScraper(Scraper): (searchUrl, search)) searchUrls.append(searchUrl) - if not cls.multipleImagesPerStrip and searchUrls: + if not self.multipleImagesPerStrip and searchUrls: # do not search other links if one pattern matched break if not searchUrls: raise ValueError("XPath %s not found at URL %s." % (searches, url)) return searchUrls - @classmethod - def fetchText(cls, url, data, textSearch, optional): + def fetchText(self, url, data, textSearch, optional): """Search text entry for given text XPath in a HTML page.""" - if cls.css: + if self.css: searchFun = data.cssselect else: searchFun = data.xpath