Use default bounceStarter for site modules.

This commit is contained in:
Tobias Gruetzmacher 2016-04-13 01:24:13 +02:00
parent 9028724a74
commit 16004e43e4
3 changed files with 288 additions and 249 deletions

File diff suppressed because it is too large Load diff

View file

@ -6,26 +6,20 @@
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
from ..scraper import _ParserScraper from ..scraper import _ParserScraper
from ..helpers import bounceStarter
class _WLPComics(_ParserScraper): class _WLPComics(_ParserScraper):
imageSearch = '//center/*/img[contains(@alt, " Comic")]' imageSearch = '//center/*/img[contains(@alt, " Comic")]'
prevSearch = '//a[contains(text(), "Previous ")]' prevSearch = '//a[contains(text(), "Previous ")]'
nextSearch = '//a[contains(text(), "Next ")]' nextSearch = '//a[contains(text(), "Next ")]'
starter = bounceStarter()
help = 'Index format: nnn' help = 'Index format: nnn'
@classmethod @classmethod
def getName(cls): def getName(cls):
return 'WLP/' + cls.__name__ return 'WLP/' + cls.__name__
@classmethod
def starter(cls):
"""Get bounced start URL."""
data = cls.getPage(cls.url)
url2 = cls.fetchUrl(cls.url, data, cls.prevSearch)
data = cls.getPage(url2)
return cls.fetchUrl(url2, data, cls.nextSearch)
@classmethod @classmethod
def namer(cls, image_url, page_url): def namer(cls, image_url, page_url):
return (page_url.rsplit('/', 1)[-1].split('.')[0] + '_' + return (page_url.rsplit('/', 1)[-1].split('.')[0] + '_' +

View file

@ -31,20 +31,20 @@ json_file = __file__.replace(".py", ".json")
exclude_comics = [ exclude_comics = [
# unsuitable navigation # unsuitable navigation
"AlfdisAndGunnora", "AlfdisAndGunnora",
"AnAmericanNerdinAnimatedTokyo", "AnAmericanNerdInAnimatedTokyo",
"AngryAlien", "AngryAlien",
"BoozerAndStoner", "BoozerAndStoner",
"Bonejangles", "Bonejangles",
"ConradStory", "ConradStory",
"Crossing", "Crossing",
"ChristianHumberReloaded", "ChristianHumberReloaded",
"CorkandBlotto", "CorkAndBlotto",
"Democomix", "Democomix",
"ErraticBeatComics", "ErraticBeatComics",
"EnergyWielders", "EnergyWielders",
"EvilBearorg", "EvilBearorg",
"Fiascos", "Fiascos",
"FateoftheBlueStar", "FateOfTheBlueStar",
"FPK", "FPK",
"Fanartgyle", "Fanartgyle",
"FrigginRandom", "FrigginRandom",
@ -60,11 +60,11 @@ exclude_comics = [
"LucidsDream", "LucidsDream",
"MadDog", "MadDog",
"Minebreakers", "Minebreakers",
"Moonlightvalley", "MoonlightValley",
"MyImmortalFool", "MyImmortalFool",
"NATO", "NATO",
"NothingFits", "NothingFits",
"OptimisticFishermenandPessimisticFishermen", "OptimisticFishermenAndPessimisticFishermen",
"Old2G", "Old2G",
"NothingFitsArtBlog", "NothingFitsArtBlog",
"OutToLunchTheStingRayWhoreStory", "OutToLunchTheStingRayWhoreStory",
@ -77,19 +77,19 @@ exclude_comics = [
"Secondpuberty", "Secondpuberty",
"Seconds", "Seconds",
"SlightlyEccentricOrigins", "SlightlyEccentricOrigins",
"StardusttheCat", "StardustTheCat",
"StrangerthanFiction", "StrangerThanFiction",
"TalamakGreatAdventure", "TalamakGreatAdventure",
"TheBattalion", "TheBattalion",
"TheDailyProblem", "TheDailyProblem",
"TheMansionofE", "TheMansionOfE",
"ThePainter", "ThePainter",
"TheSeekers", "TheSeekers",
"TheTrialsofKlahadoftheAbyss", "TheTrialsOfKlahadOfTheAbyss",
"TheStickmen", "TheStickmen",
"ThornsInOurSide", "ThornsInOurSide",
"TopHeavyVeryBustyPinUpsForAdults", "TopHeavyVeryBustyPinUpsForAdults",
"USBUnlimitedsimulatedbody", "USBUnlimitedSimulatedBody",
"TylerHumanRecycler", "TylerHumanRecycler",
"UAF", "UAF",
"WhenPigsFly", "WhenPigsFly",
@ -99,24 +99,24 @@ exclude_comics = [
"Angst", "Angst",
# images gone # images gone
"BaseballCapsandTiaras", "BaseballCapsAndTiaras",
"CROSSWORLDSNEXUS", "CROSSWORLDSNEXUS",
"Fathead", "Fathead",
"KevinZombie", "KevinZombie",
"KindergardenCrisIs", "KindergardenCrisIs",
"NoSongsForTheDead", "NoSongsForTheDead",
"RequiemShadowbornPariah", "RequiemShadowbornPariah",
"TezzleandZeek", "TezzleAndZeek",
# broken HTML # broken HTML
"CrossingOver", "CrossingOver",
# unique html # unique html
"IKilledtheHero", "IKilledTheHero",
"PowerofPower", "PowerOfPower",
"Schizmatic", "Schizmatic",
"WaketheSleepers", "WakeTheSleepers",
"WeightofEternity", "WeightOfEternity",
] ]