Random module fixes.

This commit is contained in:
Tobias Gruetzmacher 2017-05-22 00:30:31 +02:00
parent f8def5b9db
commit 42f66c07b0
5 changed files with 26 additions and 36 deletions

View file

@ -271,6 +271,7 @@ class Removed(Scraper):
cls('GoComics/HanktheSock'), cls('GoComics/HanktheSock'),
cls('GoComics/HarambeeHills'), cls('GoComics/HarambeeHills'),
cls('GoComics/Hbenson7'), cls('GoComics/Hbenson7'),
cls('GoComics/HeadComics'),
cls('GoComics/HIP'), cls('GoComics/HIP'),
cls('GoComics/HolidayDoodles'), cls('GoComics/HolidayDoodles'),
cls('GoComics/HolySchnark'), cls('GoComics/HolySchnark'),
@ -289,6 +290,7 @@ class Removed(Scraper):
cls('GoComics/LarryvilleBlue'), cls('GoComics/LarryvilleBlue'),
cls('GoComics/Leadbellies'), cls('GoComics/Leadbellies'),
cls('GoComics/LeGooseyLu'), cls('GoComics/LeGooseyLu'),
cls('GoComics/LeighLunaComics'),
cls('GoComics/LIGHTERSIDE'), cls('GoComics/LIGHTERSIDE'),
cls('GoComics/LostInTranslation'), cls('GoComics/LostInTranslation'),
cls('GoComics/Lucan'), cls('GoComics/Lucan'),
@ -306,6 +308,7 @@ class Removed(Scraper):
cls('GoComics/Mortimer'), cls('GoComics/Mortimer'),
cls('GoComics/MrGigiAndTheSquid'), cls('GoComics/MrGigiAndTheSquid'),
cls('GoComics/MrMorris'), cls('GoComics/MrMorris'),
cls('GoComics/Mulligan'),
cls('GoComics/MyGuardianGrandpa'), cls('GoComics/MyGuardianGrandpa'),
cls('GoComics/NeatStep'), cls('GoComics/NeatStep'),
cls('GoComics/NedAndLarry'), cls('GoComics/NedAndLarry'),
@ -345,6 +348,7 @@ class Removed(Scraper):
cls('GoComics/SuburbanWilderness'), cls('GoComics/SuburbanWilderness'),
cls('GoComics/SuckerHeadSmack'), cls('GoComics/SuckerHeadSmack'),
cls('GoComics/TeacherInk'), cls('GoComics/TeacherInk'),
cls('GoComics/ThatMonkeyTune'),
cls('GoComics/TheAcerbicCaf'), cls('GoComics/TheAcerbicCaf'),
cls('GoComics/TheAdventuresofTeetyBallerina'), cls('GoComics/TheAdventuresofTeetyBallerina'),
cls('GoComics/TheEdperiment'), cls('GoComics/TheEdperiment'),
@ -492,6 +496,7 @@ class Removed(Scraper):
cls('ThunderAndLightning'), cls('ThunderAndLightning'),
cls('TinyKittenTeeth'), cls('TinyKittenTeeth'),
cls('TwoTwoOneFour'), cls('TwoTwoOneFour'),
cls('VampireCheerleaders'),
cls('WayfarersMoon'), cls('WayfarersMoon'),
cls('WebcomicsNation/AgnesQuill'), cls('WebcomicsNation/AgnesQuill'),
cls('WebcomicsNation/MyMuse'), cls('WebcomicsNation/MyMuse'),

View file

@ -5,7 +5,7 @@
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
from re import compile, escape, IGNORECASE from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter, xpath_class from ..helpers import indirectStarter, xpath_class
@ -20,13 +20,11 @@ class TheBrads(_ParserScraper):
multipleImagesPerStrip = True multipleImagesPerStrip = True
class TheDevilsPanties(_BasicScraper): class TheDevilsPanties(_WordPressScraper):
url = 'http://thedevilspanties.com/' url = 'http://thedevilspanties.com/'
stripUrl = url + 'archives/%s' stripUrl = url + 'archives/%s'
firstStripUrl = stripUrl % '300' firstStripUrl = stripUrl % '300'
imageSearch = compile(tagre("img", "src", r'(http://origin\.thedevilspanties\.com/comics/[^"]+)')) prevSearch = '//a[%s]' % xpath_class('navi-prev')
prevSearch = compile(tagre("a", "href", r'(/archives/\d+)',
after="Previous"))
help = 'Index format: number' help = 'Index format: number'
@ -101,12 +99,16 @@ class TheThinHLine(_TumblrScraper):
return super(TheThinHLine, self).getComicStrip(subPage, pageData) return super(TheThinHLine, self).getComicStrip(subPage, pageData)
class TheWhiteboard(_BasicScraper): class TheWhiteboard(_ParserScraper):
BROKEN_PAGE_MIDDLE = compile(r'</body></html><')
url = 'http://www.the-whiteboard.com/' url = 'http://www.the-whiteboard.com/'
stripUrl = url + 'auto%s.html' imageSearch = '//center/img'
imageSearch = compile(r'<img SRC="(autotwb\d{1,4}.+?|autowb\d{1,4}.+?)">', IGNORECASE) prevSearch = '//a[text()="previous"]'
prevSearch = compile(r'&nbsp<a href="(.+?)">previous</a>', IGNORECASE)
help = 'Index format: twb or wb + n wg. twb1000' # Another ugly hack :(
def _parse_page(self, data):
data = self.BROKEN_PAGE_MIDDLE.sub('<', data)
return super(TheWhiteboard, self)._parse_page(data)
class TheWotch(_WordPressScraper): class TheWotch(_WordPressScraper):

View file

@ -11,15 +11,6 @@ from ..helpers import indirectStarter, xpath_class
from ..util import tagre from ..util import tagre
class VampireCheerleaders(_BasicScraper):
url = 'http://www.vampirecheerleaders.net/'
stripUrl = url + 'strips-vc/%s'
firstStripUrl = stripUrl % 'fang_service'
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.vampirecheerleaders\.net/strips-vc/[^"]+)', before="cndprev"))
help = 'Index format: name'
class VGCats(_BasicScraper): class VGCats(_BasicScraper):
url = 'http://www.vgcats.com/comics/' url = 'http://www.vgcats.com/comics/'
stripUrl = url + '?strip_id=%s' stripUrl = url + '?strip_id=%s'

View file

@ -28,23 +28,15 @@ class WastedTalent(_BasicScraper):
help = 'Index format: stripname' help = 'Index format: stripname'
class WebDesignerCOTW(_BasicScraper): class WebDesignerCOTW(_ParserScraper):
url = 'http://www.webdesignerdepot.com/' baseUrl = 'https://www.webdesignerdepot.com/'
rurl = escape(url) url = baseUrl + 'category/comics/'
starter = indirectStarter starter = indirectStarter
stripUrl = url + '%s/' firstStripUrl = baseUrl + '2009/11/comics-of-the-week-1/'
firstStripUrl = stripUrl % '2009/11/comics-of-the-week-1' imageSearch = '//article[%s]//img' % xpath_class('article-content')
imageSearch = (
compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/\d+/\d+/\d+s?\.[^"]+)')),
compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/\d+/\d+/Christmas\d+\.[^"]+)')),
compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/comics\d+[a-z0-9]*/\d+a?\.[^"]+)')),
compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/comics/\d+\.[^"]+)')),
)
multipleImagesPerStrip = True multipleImagesPerStrip = True
prevSearch = compile(tagre("link", "href", r"(%s\d+/\d+/[^']+)" % rurl, prevSearch = '//a[span[%s]]' % xpath_class('icon-right-small')
before='prev', quote="'")) latestSearch = '//a[%s]' % xpath_class('anim-link')
latestSearch = compile(tagre("a", "href", r'(%s\d+/\d+/[^"]+/)' % rurl))
help = 'Index format: yyyy/mm/stripname'
def shouldSkipUrl(self, url, data): def shouldSkipUrl(self, url, data):
"""Skip non-comic URLs.""" """Skip non-comic URLs."""

View file

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher # Copyright (C) 2015-2017 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
@ -23,7 +23,7 @@ class ZapComic(_ParserScraper):
class Zapiro(_ParserScraper): class Zapiro(_ParserScraper):
url = 'http://mg.co.za/zapiro/' url = 'http://mg.co.za/zapiro/'
starter = bounceStarter starter = bounceStarter
imageSearch = '//img[%s]' % xpath_class('img-fluid') imageSearch = '//div[@id="cartoon"]/img'
prevSearch = '//a[%s]' % xpath_class('left') prevSearch = '//a[%s]' % xpath_class('left')
nextSearch = '//a[%s]' % xpath_class('right') nextSearch = '//a[%s]' % xpath_class('right')