Make auto-update script more flexible.

This commit is contained in:
Tobias Gruetzmacher 2016-05-22 22:55:06 +02:00
parent ca1c32cf09
commit f29472c143
18 changed files with 74 additions and 54 deletions

View file

@ -22,7 +22,7 @@ class _Arcamax(_ParserScraper):
# do not edit anything below since these entries are generated from # do not edit anything below since these entries are generated from
# scripts/update_plugins.sh # scripts/update_plugins.sh
# DO NOT REMOVE # START AUTOUPDATE
# 9ChickweedLane has a duplicate in GoComics/9ChickweedLane # 9ChickweedLane has a duplicate in GoComics/9ChickweedLane
# Agnes has a duplicate in GoComics/Agnes # Agnes has a duplicate in GoComics/Agnes
# AndyCapp has a duplicate in GoComics/AndyCapp # AndyCapp has a duplicate in GoComics/AndyCapp
@ -211,3 +211,4 @@ class TinasGroove(_Arcamax):
class Zits(_Arcamax): class Zits(_Arcamax):
path = 'zits' path = 'zits'
# END AUTOUPDATE

View file

@ -46,7 +46,7 @@ class CFDandyAndCompany(_ComicFury):
# do not edit anything below since these entries are generated from # do not edit anything below since these entries are generated from
# scripts/update_plugins.sh # scripts/update_plugins.sh
# DO NOT REMOVE # START AUTOUPDATE
class CF0Eight(_ComicFury): class CF0Eight(_ComicFury):
@ -4018,3 +4018,4 @@ class CFZeroEffortFantasy(_ComicFury):
class CFZwergElf(_ComicFury): class CFZwergElf(_ComicFury):
sub = 'zwergelf' sub = 'zwergelf'
lang = 'de' lang = 'de'
# END AUTOUPDATE

View file

@ -50,7 +50,7 @@ class ComicGenesis(_BasicScraper):
return [ return [
# do not edit anything below since these entries are generated from # do not edit anything below since these entries are generated from
# scripts/update_plugins.sh # scripts/update_plugins.sh
# DO NOT REMOVE # START AUTOUPDATE
cls('AAAAA', 'aaaaa'), cls('AAAAA', 'aaaaa'),
cls('AdventuresofKiltman', 'kiltman'), cls('AdventuresofKiltman', 'kiltman'),
cls('AmorModerno', 'amormoderno'), cls('AmorModerno', 'amormoderno'),
@ -128,4 +128,5 @@ class ComicGenesis(_BasicScraper):
cls('WebcomicTheWebcomicWebcomicWebcomicWebcomic', 'dannormnsanidey'), cls('WebcomicTheWebcomicWebcomicWebcomicWebcomic', 'dannormnsanidey'),
cls('WhatYouDontSee', 'phantomlady4'), cls('WhatYouDontSee', 'phantomlady4'),
cls('Wierdman', 'asa'), cls('Wierdman', 'asa'),
# END AUTOUPDATE
] ]

View file

@ -86,7 +86,7 @@ class WinnieThePooh(_Creators):
# do not edit anything below since these entries are generated from # do not edit anything below since these entries are generated from
# scripts/update_plugins.sh # scripts/update_plugins.sh
# DO NOT REMOVE # START AUTOUPDATE
# Agnes has a duplicate in gocomics # Agnes has a duplicate in gocomics
# AndyCapp has a duplicate in gocomics # AndyCapp has a duplicate in gocomics
class AndyMarlette(_Creators): class AndyMarlette(_Creators):
@ -163,3 +163,4 @@ class WizardOfIdSpanish(_CreatorsEs):
# WorkingItOut has a duplicate in gocomics # WorkingItOut has a duplicate in gocomics
# ZackHill has a duplicate in gocomics # ZackHill has a duplicate in gocomics
# END AUTOUPDATE

View file

@ -61,7 +61,7 @@ Overrides = {
# do not edit anything below since these entries are generated from scripts/update.sh # do not edit anything below since these entries are generated from scripts/update.sh
# DO NOT REMOVE # START AUTOUPDATE
add('12_Men_Died_Making_This_Strip', '12_Men_Died_Making_This_Strip') add('12_Men_Died_Making_This_Strip', '12_Men_Died_Making_This_Strip')
add('1337_Joe_and_Fellow_Seth', '1337_Joe_and_Fellow_Seth') add('1337_Joe_and_Fellow_Seth', '1337_Joe_and_Fellow_Seth')
add('20_Galaxies', '20_Galaxies') add('20_Galaxies', '20_Galaxies')
@ -1295,3 +1295,4 @@ add('Yamete_Kudasai', 'Yamete_Kudasai')
add('Yaoi_Seth', 'Yaoi_Seth') add('Yaoi_Seth', 'Yaoi_Seth')
add('Yeah_wait_what', 'Yeah_wait_what') add('Yeah_wait_what', 'Yeah_wait_what')
add('Yoshi_Saga', 'Yoshi_Saga') add('Yoshi_Saga', 'Yoshi_Saga')
# END AUTOUPDATE

View file

@ -620,7 +620,7 @@ class GCYouGuysAreMyFriendsTheComic(_GoComics):
# do not edit anything below since these entries are generated from # do not edit anything below since these entries are generated from
# scripts/update_plugins.sh # scripts/update_plugins.sh
# DO NOT REMOVE # START AUTOUPDATE
class GC060(_GoComics): class GC060(_GoComics):
@ -3481,3 +3481,4 @@ class GCZombieHeights(_GoComics):
class GCZootopia(_GoComics): class GCZootopia(_GoComics):
path = 'zootopia' path = 'zootopia'
# END AUTOUPDATE

View file

@ -32,7 +32,7 @@ class KeenSpot(_BasicScraper):
return [ return [
# do not edit anything below since these entries are generated from # do not edit anything below since these entries are generated from
# scripts/update_plugins.sh # scripts/update_plugins.sh
# DO NOT REMOVE # START AUTOUPDATE
cls('27TwentySeven', 'twenty-seven'), cls('27TwentySeven', 'twenty-seven'),
cls('Adventurers', 'adventurers'), cls('Adventurers', 'adventurers'),
cls('AntiheroForHire', 'antihero'), cls('AntiheroForHire', 'antihero'),
@ -76,4 +76,5 @@ class KeenSpot(_BasicScraper):
cls('TheHuntersofSalamanstra', 'salamanstra'), cls('TheHuntersofSalamanstra', 'salamanstra'),
cls('TheLounge', 'thelounge'), cls('TheLounge', 'thelounge'),
cls('WICKEDPOWERED', 'wickedpowered'), cls('WICKEDPOWERED', 'wickedpowered'),
# END AUTOUPDATE
] ]

View file

@ -85,7 +85,7 @@ class _SmackJeeves(_ParserScraper):
# do not edit anything below since these entries are generated from # do not edit anything below since these entries are generated from
# scripts/update_plugins.sh # scripts/update_plugins.sh
# DO NOT REMOVE # START AUTOUPDATE
class SJ20TimesKirby(_SmackJeeves): class SJ20TimesKirby(_SmackJeeves):
@ -2752,3 +2752,4 @@ class SJZeldaTheNewAdventureofLinkIIMajorasMask(_SmackJeeves):
class SJ_A_(_SmackJeeves): class SJ_A_(_SmackJeeves):
sub = 'a-the-stalker' sub = 'a-the-stalker'
# END AUTOUPDATE

View file

@ -8,6 +8,7 @@ from __future__ import absolute_import, division, print_function
from .common import _WordPressScraper, WP_LATEST_SEARCH from .common import _WordPressScraper, WP_LATEST_SEARCH
from ..helpers import indirectStarter from ..helpers import indirectStarter
class _WebcomicFactory(_WordPressScraper): class _WebcomicFactory(_WordPressScraper):
starter = indirectStarter starter = indirectStarter
latestSearch = WP_LATEST_SEARCH latestSearch = WP_LATEST_SEARCH
@ -15,7 +16,7 @@ class _WebcomicFactory(_WordPressScraper):
# do not edit anything below since these entries are generated from # do not edit anything below since these entries are generated from
# scripts/update_plugins.sh # scripts/update_plugins.sh
# DO NOT REMOVE # START AUTOUPDATE
class AsTheMayoTurns(_WebcomicFactory): class AsTheMayoTurns(_WebcomicFactory):
@ -166,3 +167,4 @@ class WeirdBikerTales(_WebcomicFactory):
class WillysSpaceDive(_WebcomicFactory): class WillysSpaceDive(_WebcomicFactory):
url = 'http://www.thewebcomicfactory.com/comic/willys-space-dive/' url = 'http://www.thewebcomicfactory.com/comic/willys-space-dive/'
firstStripUrl = url firstStripUrl = url
# END AUTOUPDATE

View file

@ -35,7 +35,7 @@ class ArcamaxUpdater(ComicListUpdater):
"""Parse all search result pages.""" """Parse all search result pages."""
self.handle_url('http://www.arcamax.com/comics') self.handle_url('http://www.arcamax.com/comics')
def get_classdef(self, name, entry): def get_entry(self, name, entry):
return u"class %s(_Arcamax):\n path = %r" % (name, entry) return u"class %s(_Arcamax):\n path = %r" % (name, entry)

View file

@ -163,7 +163,7 @@ class ComicFuryUpdater(ComicListUpdater):
page += 1 page += 1
print(last_count, file=sys.stderr, end=" ") print(last_count, file=sys.stderr, end=" ")
def get_classdef(self, name, entry): def get_entry(self, name, entry):
url, active, lang = entry url, active, lang = entry
langopt = '' langopt = ''
if lang != "english": if lang != "english":

View file

@ -36,7 +36,7 @@ class CreatorsUpdater(ComicListUpdater):
self.handle_url('https://www.creators.com/categories/comics/all') self.handle_url('https://www.creators.com/categories/comics/all')
self.handle_url('https://www.creators.com/categories/cartoons/all') self.handle_url('https://www.creators.com/categories/cartoons/all')
def get_classdef(self, name, data): def get_entry(self, name, data):
lang = 'Es' if name.lower().endswith('spanish') else '' lang = 'Es' if name.lower().endswith('spanish') else ''
return u"class %s(_Creators%s):\n path = %r" % (name, lang, data) return u"class %s(_Creators%s):\n path = %r" % (name, lang, data)

View file

@ -51,7 +51,7 @@ class GoComicsUpdater(ComicListUpdater):
self.handle_url('http://www.gocomics.com/explore/editorial_list') self.handle_url('http://www.gocomics.com/explore/editorial_list')
self.handle_url('http://www.gocomics.com/explore/sherpa_list') self.handle_url('http://www.gocomics.com/explore/sherpa_list')
def get_classdef(self, name, url): def get_entry(self, name, url):
return u"class GC%s(_GoComics%s):\n path = %r" % ( return u"class GC%s(_GoComics%s):\n path = %r" % (
name, 'Es' if 'espanol/' in url else '', url[1:]) name, 'Es' if 'espanol/' in url else '', url[1:])

View file

@ -1,24 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
"""Remove all lines after a given marker line."""
from __future__ import absolute_import, division, print_function
import fileinput
import sys
def main(args):
"""Remove lines after marker."""
filename = args[0]
marker = args[1]
for line in fileinput.input(filename, inplace=1):
print(line.rstrip())
if line.startswith(marker):
break
if __name__ == '__main__':
main(sys.argv[1:])

View file

@ -11,6 +11,11 @@ import sys
import json import json
import codecs import codecs
try:
from os import replace as rename
except ImportError:
from os import rename
import requests import requests
from lxml import html from lxml import html
@ -28,6 +33,9 @@ class ComicListUpdater(object):
dup_templates = () dup_templates = ()
excluded_comics = () excluded_comics = ()
START = "# START AUTOUPDATE"
END = "# END AUTOUPDATE"
def __init__(self, name): def __init__(self, name):
self.json = name.replace(".py", ".json") self.json = name.replace(".py", ".json")
self.session = requests.Session() self.session = requests.Session()
@ -79,22 +87,48 @@ class ComicListUpdater(object):
comic strips.""" comic strips."""
min_comics, filename = args min_comics, filename = args
min_comics = int(min_comics) min_comics = int(min_comics)
with codecs.open(filename, 'a', 'utf-8') as fp: oldf = codecs.open(filename, 'r', 'utf-8')
newf = codecs.open(filename + '.new', 'w', 'utf-8')
with oldf, newf:
indent = self.copy_until_start(oldf, newf)
with codecs.open(self.json, 'rb', 'utf-8') as f: with codecs.open(self.json, 'rb', 'utf-8') as f:
data = json.load(f) data = json.load(f)
for name, entry in sorted(data.items(), key=first_lower): for name, entry in sorted(data.items(), key=first_lower):
if name in self.excluded_comics: self.write_entry(newf, name, entry, min_comics, indent)
continue self.copy_after_end(oldf, newf)
count = entry['count'] rename(filename + '.new', filename)
if count and count < min_comics:
continue def copy_until_start(self, src, dest):
dup = self.find_dups(name) for line in src:
if dup is not None: dest.write(line)
fp.write(u"# %s has a duplicate in %s\n" % (name, dup)) if line.strip().startswith(self.START):
else: return line.find(self.START)
fp.write(u"\n\n%s\n" % raise RuntimeError("can't find start marker!")
self.get_classdef(truncate_name(name),
entry['data'])) def copy_after_end(self, src, dest):
skip = True
for line in src:
if line.strip().startswith(self.END):
skip = False
if not skip:
dest.write(line)
if skip:
raise RuntimeError("can't find end marker!")
def write_entry(self, fp, name, entry, min_comics, indent):
if name in self.excluded_comics:
return
count = entry['count']
if count and count < min_comics:
return
dup = self.find_dups(name)
fp.write(" " * indent)
if dup is not None:
fp.write(u"# %s has a duplicate in %s\n" % (name, dup))
else:
fp.write(self.get_entry(
truncate_name(name),
entry['data']).replace("\n", "\n" + (" " * indent)) + "\n")
def find_dups(self, name): def find_dups(self, name):
"""Check if comic name already exists.""" """Check if comic name already exists."""
@ -106,7 +140,8 @@ class ComicListUpdater(object):
return scraperobj.name return scraperobj.name
return None return None
def get_classdef(self, name, data): def get_entry(self, name, data):
"""Return an entry for the module generator."""
raise NotImplementedError raise NotImplementedError
def run(self): def run(self):

View file

@ -162,7 +162,7 @@ class SmackJeevesUpdater(ComicListUpdater):
print(last_count, file=sys.stderr, end=" ") print(last_count, file=sys.stderr, end=" ")
next_url, last_count = self.handle_url(next_url) next_url, last_count = self.handle_url(next_url)
def get_classdef(self, name, data): def get_entry(self, name, data):
sub, top = urlsplit(data[0]).hostname.split('.', 1) sub, top = urlsplit(data[0]).hostname.split('.', 1)
cl = u"class SJ%s(_SmackJeeves):" % name cl = u"class SJ%s(_SmackJeeves):" % name
if top.lower() == "smackjeeves.com": if top.lower() == "smackjeeves.com":

View file

@ -14,6 +14,5 @@ fi
for script in $list; do for script in $list; do
target="${d}/../dosagelib/plugins/${script}.py" target="${d}/../dosagelib/plugins/${script}.py"
echo "Upating $target" echo "Upating $target"
"${d}/removeafter.py" "$target" "# DO NOT REMOVE"
"${d}/${script}.py" $mincomics "$target" "${d}/${script}.py" $mincomics "$target"
done done

View file

@ -39,7 +39,7 @@ class WebComicFactoryUpdater(ComicListUpdater):
comicurl = self.find_first(comicurl) comicurl = self.find_first(comicurl)
self.add_comic(name, comicurl) self.add_comic(name, comicurl)
def get_classdef(self, name, url): def get_entry(self, name, url):
return (u"class %s(_WebcomicFactory):\n url = %r\n" % (name, url) + return (u"class %s(_WebcomicFactory):\n url = %r\n" % (name, url) +
u" firstStripUrl = url") u" firstStripUrl = url")