Make auto-update script more flexible.

This commit is contained in:
Tobias Gruetzmacher 2016-05-22 22:55:06 +02:00
parent ca1c32cf09
commit f29472c143
18 changed files with 74 additions and 54 deletions

View file

@ -22,7 +22,7 @@ class _Arcamax(_ParserScraper):
# do not edit anything below since these entries are generated from
# scripts/update_plugins.sh
# DO NOT REMOVE
# START AUTOUPDATE
# 9ChickweedLane has a duplicate in GoComics/9ChickweedLane
# Agnes has a duplicate in GoComics/Agnes
# AndyCapp has a duplicate in GoComics/AndyCapp
@ -211,3 +211,4 @@ class TinasGroove(_Arcamax):
class Zits(_Arcamax):
path = 'zits'
# END AUTOUPDATE

View file

@ -46,7 +46,7 @@ class CFDandyAndCompany(_ComicFury):
# do not edit anything below since these entries are generated from
# scripts/update_plugins.sh
# DO NOT REMOVE
# START AUTOUPDATE
class CF0Eight(_ComicFury):
@ -4018,3 +4018,4 @@ class CFZeroEffortFantasy(_ComicFury):
class CFZwergElf(_ComicFury):
sub = 'zwergelf'
lang = 'de'
# END AUTOUPDATE

View file

@ -50,7 +50,7 @@ class ComicGenesis(_BasicScraper):
return [
# do not edit anything below since these entries are generated from
# scripts/update_plugins.sh
# DO NOT REMOVE
# START AUTOUPDATE
cls('AAAAA', 'aaaaa'),
cls('AdventuresofKiltman', 'kiltman'),
cls('AmorModerno', 'amormoderno'),
@ -128,4 +128,5 @@ class ComicGenesis(_BasicScraper):
cls('WebcomicTheWebcomicWebcomicWebcomicWebcomic', 'dannormnsanidey'),
cls('WhatYouDontSee', 'phantomlady4'),
cls('Wierdman', 'asa'),
# END AUTOUPDATE
]

View file

@ -86,7 +86,7 @@ class WinnieThePooh(_Creators):
# do not edit anything below since these entries are generated from
# scripts/update_plugins.sh
# DO NOT REMOVE
# START AUTOUPDATE
# Agnes has a duplicate in gocomics
# AndyCapp has a duplicate in gocomics
class AndyMarlette(_Creators):
@ -163,3 +163,4 @@ class WizardOfIdSpanish(_CreatorsEs):
# WorkingItOut has a duplicate in gocomics
# ZackHill has a duplicate in gocomics
# END AUTOUPDATE

View file

@ -61,7 +61,7 @@ Overrides = {
# do not edit anything below since these entries are generated from scripts/update.sh
# DO NOT REMOVE
# START AUTOUPDATE
add('12_Men_Died_Making_This_Strip', '12_Men_Died_Making_This_Strip')
add('1337_Joe_and_Fellow_Seth', '1337_Joe_and_Fellow_Seth')
add('20_Galaxies', '20_Galaxies')
@ -1295,3 +1295,4 @@ add('Yamete_Kudasai', 'Yamete_Kudasai')
add('Yaoi_Seth', 'Yaoi_Seth')
add('Yeah_wait_what', 'Yeah_wait_what')
add('Yoshi_Saga', 'Yoshi_Saga')
# END AUTOUPDATE

View file

@ -620,7 +620,7 @@ class GCYouGuysAreMyFriendsTheComic(_GoComics):
# do not edit anything below since these entries are generated from
# scripts/update_plugins.sh
# DO NOT REMOVE
# START AUTOUPDATE
class GC060(_GoComics):
@ -3481,3 +3481,4 @@ class GCZombieHeights(_GoComics):
class GCZootopia(_GoComics):
path = 'zootopia'
# END AUTOUPDATE

View file

@ -32,7 +32,7 @@ class KeenSpot(_BasicScraper):
return [
# do not edit anything below since these entries are generated from
# scripts/update_plugins.sh
# DO NOT REMOVE
# START AUTOUPDATE
cls('27TwentySeven', 'twenty-seven'),
cls('Adventurers', 'adventurers'),
cls('AntiheroForHire', 'antihero'),
@ -76,4 +76,5 @@ class KeenSpot(_BasicScraper):
cls('TheHuntersofSalamanstra', 'salamanstra'),
cls('TheLounge', 'thelounge'),
cls('WICKEDPOWERED', 'wickedpowered'),
# END AUTOUPDATE
]

View file

@ -85,7 +85,7 @@ class _SmackJeeves(_ParserScraper):
# do not edit anything below since these entries are generated from
# scripts/update_plugins.sh
# DO NOT REMOVE
# START AUTOUPDATE
class SJ20TimesKirby(_SmackJeeves):
@ -2752,3 +2752,4 @@ class SJZeldaTheNewAdventureofLinkIIMajorasMask(_SmackJeeves):
class SJ_A_(_SmackJeeves):
sub = 'a-the-stalker'
# END AUTOUPDATE

View file

@ -8,6 +8,7 @@ from __future__ import absolute_import, division, print_function
from .common import _WordPressScraper, WP_LATEST_SEARCH
from ..helpers import indirectStarter
class _WebcomicFactory(_WordPressScraper):
starter = indirectStarter
latestSearch = WP_LATEST_SEARCH
@ -15,7 +16,7 @@ class _WebcomicFactory(_WordPressScraper):
# do not edit anything below since these entries are generated from
# scripts/update_plugins.sh
# DO NOT REMOVE
# START AUTOUPDATE
class AsTheMayoTurns(_WebcomicFactory):
@ -166,3 +167,4 @@ class WeirdBikerTales(_WebcomicFactory):
class WillysSpaceDive(_WebcomicFactory):
url = 'http://www.thewebcomicfactory.com/comic/willys-space-dive/'
firstStripUrl = url
# END AUTOUPDATE

View file

@ -35,7 +35,7 @@ class ArcamaxUpdater(ComicListUpdater):
"""Parse all search result pages."""
self.handle_url('http://www.arcamax.com/comics')
def get_classdef(self, name, entry):
def get_entry(self, name, entry):
return u"class %s(_Arcamax):\n path = %r" % (name, entry)

View file

@ -163,7 +163,7 @@ class ComicFuryUpdater(ComicListUpdater):
page += 1
print(last_count, file=sys.stderr, end=" ")
def get_classdef(self, name, entry):
def get_entry(self, name, entry):
url, active, lang = entry
langopt = ''
if lang != "english":

View file

@ -36,7 +36,7 @@ class CreatorsUpdater(ComicListUpdater):
self.handle_url('https://www.creators.com/categories/comics/all')
self.handle_url('https://www.creators.com/categories/cartoons/all')
def get_classdef(self, name, data):
def get_entry(self, name, data):
lang = 'Es' if name.lower().endswith('spanish') else ''
return u"class %s(_Creators%s):\n path = %r" % (name, lang, data)

View file

@ -51,7 +51,7 @@ class GoComicsUpdater(ComicListUpdater):
self.handle_url('http://www.gocomics.com/explore/editorial_list')
self.handle_url('http://www.gocomics.com/explore/sherpa_list')
def get_classdef(self, name, url):
def get_entry(self, name, url):
return u"class GC%s(_GoComics%s):\n path = %r" % (
name, 'Es' if 'espanol/' in url else '', url[1:])

View file

@ -1,24 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
"""Remove all lines after a given marker line."""
from __future__ import absolute_import, division, print_function
import fileinput
import sys
def main(args):
"""Remove lines after marker."""
filename = args[0]
marker = args[1]
for line in fileinput.input(filename, inplace=1):
print(line.rstrip())
if line.startswith(marker):
break
if __name__ == '__main__':
main(sys.argv[1:])

View file

@ -11,6 +11,11 @@ import sys
import json
import codecs
try:
from os import replace as rename
except ImportError:
from os import rename
import requests
from lxml import html
@ -28,6 +33,9 @@ class ComicListUpdater(object):
dup_templates = ()
excluded_comics = ()
START = "# START AUTOUPDATE"
END = "# END AUTOUPDATE"
def __init__(self, name):
self.json = name.replace(".py", ".json")
self.session = requests.Session()
@ -79,22 +87,48 @@ class ComicListUpdater(object):
comic strips."""
min_comics, filename = args
min_comics = int(min_comics)
with codecs.open(filename, 'a', 'utf-8') as fp:
oldf = codecs.open(filename, 'r', 'utf-8')
newf = codecs.open(filename + '.new', 'w', 'utf-8')
with oldf, newf:
indent = self.copy_until_start(oldf, newf)
with codecs.open(self.json, 'rb', 'utf-8') as f:
data = json.load(f)
for name, entry in sorted(data.items(), key=first_lower):
if name in self.excluded_comics:
continue
count = entry['count']
if count and count < min_comics:
continue
dup = self.find_dups(name)
if dup is not None:
fp.write(u"# %s has a duplicate in %s\n" % (name, dup))
else:
fp.write(u"\n\n%s\n" %
self.get_classdef(truncate_name(name),
entry['data']))
self.write_entry(newf, name, entry, min_comics, indent)
self.copy_after_end(oldf, newf)
rename(filename + '.new', filename)
def copy_until_start(self, src, dest):
for line in src:
dest.write(line)
if line.strip().startswith(self.START):
return line.find(self.START)
raise RuntimeError("can't find start marker!")
def copy_after_end(self, src, dest):
skip = True
for line in src:
if line.strip().startswith(self.END):
skip = False
if not skip:
dest.write(line)
if skip:
raise RuntimeError("can't find end marker!")
def write_entry(self, fp, name, entry, min_comics, indent):
if name in self.excluded_comics:
return
count = entry['count']
if count and count < min_comics:
return
dup = self.find_dups(name)
fp.write(" " * indent)
if dup is not None:
fp.write(u"# %s has a duplicate in %s\n" % (name, dup))
else:
fp.write(self.get_entry(
truncate_name(name),
entry['data']).replace("\n", "\n" + (" " * indent)) + "\n")
def find_dups(self, name):
"""Check if comic name already exists."""
@ -106,7 +140,8 @@ class ComicListUpdater(object):
return scraperobj.name
return None
def get_classdef(self, name, data):
def get_entry(self, name, data):
"""Return an entry for the module generator."""
raise NotImplementedError
def run(self):

View file

@ -162,7 +162,7 @@ class SmackJeevesUpdater(ComicListUpdater):
print(last_count, file=sys.stderr, end=" ")
next_url, last_count = self.handle_url(next_url)
def get_classdef(self, name, data):
def get_entry(self, name, data):
sub, top = urlsplit(data[0]).hostname.split('.', 1)
cl = u"class SJ%s(_SmackJeeves):" % name
if top.lower() == "smackjeeves.com":

View file

@ -14,6 +14,5 @@ fi
for script in $list; do
target="${d}/../dosagelib/plugins/${script}.py"
echo "Upating $target"
"${d}/removeafter.py" "$target" "# DO NOT REMOVE"
"${d}/${script}.py" $mincomics "$target"
done

View file

@ -39,7 +39,7 @@ class WebComicFactoryUpdater(ComicListUpdater):
comicurl = self.find_first(comicurl)
self.add_comic(name, comicurl)
def get_classdef(self, name, url):
def get_entry(self, name, url):
return (u"class %s(_WebcomicFactory):\n url = %r\n" % (name, url) +
u" firstStripUrl = url")