dosage/dosagelib/plugins/wordpress.py

89 lines
4.1 KiB
Python
Raw Normal View History

2015-05-15 12:15:32 +00:00
# -*- coding: utf-8 -*-
from dosagelib.helpers import indirectStarter
2015-05-15 12:15:32 +00:00
from ..scraper import make_scraper, _ParserScraper
2015-05-20 11:56:49 +00:00
def add(name, url, firstUrl=None, starter=None, lang=None):
2015-05-20 11:56:49 +00:00
attrs = dict(
name=name,
url=url,
2015-06-04 07:24:55 +00:00
imageSearch=['//div[@id="comic"]//img',
'//div[@class="webcomic-image"]//img'],
prevSearch=["//a[contains(concat(' ', text(), ' '), ' Prev ')]",
"//a[contains(concat(' ', text(), ' '), ' Previous ')]",
2015-05-25 08:06:58 +00:00
"//a[contains(concat(' ', @class, ' '), ' navi-prev ')]",
2015-05-26 07:32:12 +00:00
"//a[contains(concat(' ', @class, ' '), ' navi-prev-in ')]",
2015-06-04 07:24:55 +00:00
"//a[contains(concat(' ', @class, ' '), ' navi-previous ')]",
"//a[contains(concat(' ', @class, ' '), ' previous-webcomic-link ')]"]
2015-05-20 11:56:49 +00:00
)
if lang:
attrs['lang'] = lang
if firstUrl:
attrs['firstUrl'] = url + firstUrl
if starter:
attrs['starter'] = starter
2015-05-20 11:56:49 +00:00
globals()[name] = make_scraper(name, _ParserScraper, **attrs)
2015-05-15 12:15:32 +00:00
2015-05-25 09:57:06 +00:00
add('1997', 'http://1977thecomic.com/')
2015-06-01 03:15:34 +00:00
add('Alice', 'http://www.alicecomics.com/', starter=indirectStarter('http://www.alicecomics.com/','//a[text()="Latest Alice!"]'))
2015-05-25 09:57:06 +00:00
add('Amya', 'http://www.amyachronicles.com/')
add('AxeCop', 'http://axecop.com/comic/season-two/')
2015-05-31 11:03:40 +00:00
add('Bardsworth','http://www.bardsworth.com/')
2015-05-20 12:04:07 +00:00
add('BloodBound', 'http://bloodboundcomic.com/', 'comic/06112006/')
2015-05-25 09:57:06 +00:00
add('BratHalla', 'http://brat-halla.com/')
2015-05-20 12:00:35 +00:00
add('BroodHollow', 'http://broodhollow.chainsawsuit.com/', 'page/2012/10/06/book-1-curious-little-thing')
2015-05-31 10:50:17 +00:00
add('Buni', 'http://www.bunicomic.com/')
add('BusinessCat', 'http://www.businesscat.happyjar.com/')
2015-05-25 09:57:06 +00:00
add('Catena', 'http://catenamanor.com/')
2015-05-31 10:50:17 +00:00
add('CatsAndCameras', 'http://catsncameras.com/')
2015-05-26 07:32:12 +00:00
add('CraftedFables', 'http://www.caf-fiends.net/comicpress/')
2015-05-20 11:16:54 +00:00
add('CourtingDisaster', 'http://www.courting-disaster.com/', 'comic/courting-disaster-17/')
add('CowboyJedi', 'http://www.cowboyjedi.com/')
2015-05-31 11:29:34 +00:00
add('FowlLanguage', 'http://www.fowllanguagecomics.com/')
2015-05-27 08:34:51 +00:00
add('HappyJar', 'http://www.happyjar.com/')
2015-05-25 09:57:06 +00:00
add('Hipsters', 'http://www.hipsters-comic.com/', 'comic/hip01/')
add('IDreamOfAJeanieBottle', 'http://jeaniebottle.com/')
add('ItsWalky', 'http://www.itswalky.com/')
add('KatzenfutterGeleespritzer', 'http://www.katzenfuttergeleespritzer.de/', 'comics/gert-grendil/', lang='de')
2015-05-31 11:41:12 +00:00
add('Meek', 'http://www.meekcomic.com/')
2015-05-25 09:57:06 +00:00
add('Meiosis', 'http://meiosiswebcomic.com/')
add('Melonpool', 'http://www.melonpool.com/')
2015-06-04 07:06:40 +00:00
add('MistyTheMouse', 'http://www.mistythemouse.com/')
2015-05-25 09:57:06 +00:00
add('Nedroid', 'http://nedroid.com/')
add('Nicky510', 'http://www.nickyitis.com/')
2015-05-20 11:50:18 +00:00
add('OnTheEdge', 'http://ontheedgecomics.com/', 'comic/ote0001/')
2015-05-20 11:56:49 +00:00
add('PandyLand', 'http://pandyland.net/', '1/')
2015-05-25 09:57:06 +00:00
add('SailorsunOrg', 'http://sailorsun.org/')
2015-06-04 07:24:55 +00:00
add('Sithrah', 'http://sithrah.com/')
2015-05-25 09:57:06 +00:00
add('SlightlyDamned', 'http://www.sdamned.com/')
add('SPQRBlues', 'http://spqrblues.com/IV/')
add('TheDreamlandChronicles', 'http://www.thedreamlandchronicles.com/')
2015-05-26 11:48:22 +00:00
add('TheGentlemansArmchair', 'http://thegentlemansarmchair.com/')
2015-06-06 02:25:32 +00:00
add('TheWebcomicFactory', 'http://www.thewebcomicfactory.com/')
2015-05-25 09:57:06 +00:00
add('YAFGC', 'http://yafgc.net/')
# all comics on HijiNKS ENSUE
for (name, starterXPath) in [
('HijinksEnsue', '//h4[text()="Read The Latest HijiNKS ENSUE"]/..//a'),
('HijinksEnsueClassic', '//h4[text()="Read HijiNKS ENSUE Classic"]/..//a[3]'),
('Faneurysm', '//h4[text()="Read The Latest FANEURYSM"]/..//a'),
('HijinksEnsueConvention', '//h4[text()="Latest Fancy Convention Sketches"]/..//a'),
('HijinksEnsuePhoto', '//h4[text()="Latest Fancy Photo Comic"]/..//a')
]:
add(name, 'http://hijinksensue.com/', starter=indirectStarter('http://hijinksensue.com/', starterXPath))
# all comics on flowerlarkstudios
for (name, linkNumber) in [
('Ashes', 1),
('Eryl', 3),
# this is a duplicate as it was under this name in previous versions of dosage
('DarkWings', 3),
('Laiyu', 5),
('NoMoreSavePoints', 7),
('EasilyAmused', 9)
]:
add(name, 'http://www.flowerlarkstudios.com/',
starter=indirectStarter('http://www.flowerlarkstudios.com/',
'(//div[@id="sidebar-left"]//a)[' + str(linkNumber) + ']'))