Make auto-update script more flexible.
This commit is contained in:
parent
ca1c32cf09
commit
f29472c143
18 changed files with 74 additions and 54 deletions
|
@ -22,7 +22,7 @@ class _Arcamax(_ParserScraper):
|
||||||
|
|
||||||
# do not edit anything below since these entries are generated from
|
# do not edit anything below since these entries are generated from
|
||||||
# scripts/update_plugins.sh
|
# scripts/update_plugins.sh
|
||||||
# DO NOT REMOVE
|
# START AUTOUPDATE
|
||||||
# 9ChickweedLane has a duplicate in GoComics/9ChickweedLane
|
# 9ChickweedLane has a duplicate in GoComics/9ChickweedLane
|
||||||
# Agnes has a duplicate in GoComics/Agnes
|
# Agnes has a duplicate in GoComics/Agnes
|
||||||
# AndyCapp has a duplicate in GoComics/AndyCapp
|
# AndyCapp has a duplicate in GoComics/AndyCapp
|
||||||
|
@ -211,3 +211,4 @@ class TinasGroove(_Arcamax):
|
||||||
|
|
||||||
class Zits(_Arcamax):
|
class Zits(_Arcamax):
|
||||||
path = 'zits'
|
path = 'zits'
|
||||||
|
# END AUTOUPDATE
|
||||||
|
|
|
@ -46,7 +46,7 @@ class CFDandyAndCompany(_ComicFury):
|
||||||
|
|
||||||
# do not edit anything below since these entries are generated from
|
# do not edit anything below since these entries are generated from
|
||||||
# scripts/update_plugins.sh
|
# scripts/update_plugins.sh
|
||||||
# DO NOT REMOVE
|
# START AUTOUPDATE
|
||||||
|
|
||||||
|
|
||||||
class CF0Eight(_ComicFury):
|
class CF0Eight(_ComicFury):
|
||||||
|
@ -4018,3 +4018,4 @@ class CFZeroEffortFantasy(_ComicFury):
|
||||||
class CFZwergElf(_ComicFury):
|
class CFZwergElf(_ComicFury):
|
||||||
sub = 'zwergelf'
|
sub = 'zwergelf'
|
||||||
lang = 'de'
|
lang = 'de'
|
||||||
|
# END AUTOUPDATE
|
||||||
|
|
|
@ -50,7 +50,7 @@ class ComicGenesis(_BasicScraper):
|
||||||
return [
|
return [
|
||||||
# do not edit anything below since these entries are generated from
|
# do not edit anything below since these entries are generated from
|
||||||
# scripts/update_plugins.sh
|
# scripts/update_plugins.sh
|
||||||
# DO NOT REMOVE
|
# START AUTOUPDATE
|
||||||
cls('AAAAA', 'aaaaa'),
|
cls('AAAAA', 'aaaaa'),
|
||||||
cls('AdventuresofKiltman', 'kiltman'),
|
cls('AdventuresofKiltman', 'kiltman'),
|
||||||
cls('AmorModerno', 'amormoderno'),
|
cls('AmorModerno', 'amormoderno'),
|
||||||
|
@ -128,4 +128,5 @@ class ComicGenesis(_BasicScraper):
|
||||||
cls('WebcomicTheWebcomicWebcomicWebcomicWebcomic', 'dannormnsanidey'),
|
cls('WebcomicTheWebcomicWebcomicWebcomicWebcomic', 'dannormnsanidey'),
|
||||||
cls('WhatYouDontSee', 'phantomlady4'),
|
cls('WhatYouDontSee', 'phantomlady4'),
|
||||||
cls('Wierdman', 'asa'),
|
cls('Wierdman', 'asa'),
|
||||||
|
# END AUTOUPDATE
|
||||||
]
|
]
|
||||||
|
|
|
@ -86,7 +86,7 @@ class WinnieThePooh(_Creators):
|
||||||
|
|
||||||
# do not edit anything below since these entries are generated from
|
# do not edit anything below since these entries are generated from
|
||||||
# scripts/update_plugins.sh
|
# scripts/update_plugins.sh
|
||||||
# DO NOT REMOVE
|
# START AUTOUPDATE
|
||||||
# Agnes has a duplicate in gocomics
|
# Agnes has a duplicate in gocomics
|
||||||
# AndyCapp has a duplicate in gocomics
|
# AndyCapp has a duplicate in gocomics
|
||||||
class AndyMarlette(_Creators):
|
class AndyMarlette(_Creators):
|
||||||
|
@ -163,3 +163,4 @@ class WizardOfIdSpanish(_CreatorsEs):
|
||||||
|
|
||||||
# WorkingItOut has a duplicate in gocomics
|
# WorkingItOut has a duplicate in gocomics
|
||||||
# ZackHill has a duplicate in gocomics
|
# ZackHill has a duplicate in gocomics
|
||||||
|
# END AUTOUPDATE
|
||||||
|
|
|
@ -61,7 +61,7 @@ Overrides = {
|
||||||
|
|
||||||
|
|
||||||
# do not edit anything below since these entries are generated from scripts/update.sh
|
# do not edit anything below since these entries are generated from scripts/update.sh
|
||||||
# DO NOT REMOVE
|
# START AUTOUPDATE
|
||||||
add('12_Men_Died_Making_This_Strip', '12_Men_Died_Making_This_Strip')
|
add('12_Men_Died_Making_This_Strip', '12_Men_Died_Making_This_Strip')
|
||||||
add('1337_Joe_and_Fellow_Seth', '1337_Joe_and_Fellow_Seth')
|
add('1337_Joe_and_Fellow_Seth', '1337_Joe_and_Fellow_Seth')
|
||||||
add('20_Galaxies', '20_Galaxies')
|
add('20_Galaxies', '20_Galaxies')
|
||||||
|
@ -1295,3 +1295,4 @@ add('Yamete_Kudasai', 'Yamete_Kudasai')
|
||||||
add('Yaoi_Seth', 'Yaoi_Seth')
|
add('Yaoi_Seth', 'Yaoi_Seth')
|
||||||
add('Yeah_wait_what', 'Yeah_wait_what')
|
add('Yeah_wait_what', 'Yeah_wait_what')
|
||||||
add('Yoshi_Saga', 'Yoshi_Saga')
|
add('Yoshi_Saga', 'Yoshi_Saga')
|
||||||
|
# END AUTOUPDATE
|
||||||
|
|
|
@ -620,7 +620,7 @@ class GCYouGuysAreMyFriendsTheComic(_GoComics):
|
||||||
|
|
||||||
# do not edit anything below since these entries are generated from
|
# do not edit anything below since these entries are generated from
|
||||||
# scripts/update_plugins.sh
|
# scripts/update_plugins.sh
|
||||||
# DO NOT REMOVE
|
# START AUTOUPDATE
|
||||||
|
|
||||||
|
|
||||||
class GC060(_GoComics):
|
class GC060(_GoComics):
|
||||||
|
@ -3481,3 +3481,4 @@ class GCZombieHeights(_GoComics):
|
||||||
|
|
||||||
class GCZootopia(_GoComics):
|
class GCZootopia(_GoComics):
|
||||||
path = 'zootopia'
|
path = 'zootopia'
|
||||||
|
# END AUTOUPDATE
|
||||||
|
|
|
@ -32,7 +32,7 @@ class KeenSpot(_BasicScraper):
|
||||||
return [
|
return [
|
||||||
# do not edit anything below since these entries are generated from
|
# do not edit anything below since these entries are generated from
|
||||||
# scripts/update_plugins.sh
|
# scripts/update_plugins.sh
|
||||||
# DO NOT REMOVE
|
# START AUTOUPDATE
|
||||||
cls('27TwentySeven', 'twenty-seven'),
|
cls('27TwentySeven', 'twenty-seven'),
|
||||||
cls('Adventurers', 'adventurers'),
|
cls('Adventurers', 'adventurers'),
|
||||||
cls('AntiheroForHire', 'antihero'),
|
cls('AntiheroForHire', 'antihero'),
|
||||||
|
@ -76,4 +76,5 @@ class KeenSpot(_BasicScraper):
|
||||||
cls('TheHuntersofSalamanstra', 'salamanstra'),
|
cls('TheHuntersofSalamanstra', 'salamanstra'),
|
||||||
cls('TheLounge', 'thelounge'),
|
cls('TheLounge', 'thelounge'),
|
||||||
cls('WICKEDPOWERED', 'wickedpowered'),
|
cls('WICKEDPOWERED', 'wickedpowered'),
|
||||||
|
# END AUTOUPDATE
|
||||||
]
|
]
|
||||||
|
|
|
@ -85,7 +85,7 @@ class _SmackJeeves(_ParserScraper):
|
||||||
|
|
||||||
# do not edit anything below since these entries are generated from
|
# do not edit anything below since these entries are generated from
|
||||||
# scripts/update_plugins.sh
|
# scripts/update_plugins.sh
|
||||||
# DO NOT REMOVE
|
# START AUTOUPDATE
|
||||||
|
|
||||||
|
|
||||||
class SJ20TimesKirby(_SmackJeeves):
|
class SJ20TimesKirby(_SmackJeeves):
|
||||||
|
@ -2752,3 +2752,4 @@ class SJZeldaTheNewAdventureofLinkIIMajorasMask(_SmackJeeves):
|
||||||
|
|
||||||
class SJ_A_(_SmackJeeves):
|
class SJ_A_(_SmackJeeves):
|
||||||
sub = 'a-the-stalker'
|
sub = 'a-the-stalker'
|
||||||
|
# END AUTOUPDATE
|
||||||
|
|
|
@ -8,6 +8,7 @@ from __future__ import absolute_import, division, print_function
|
||||||
from .common import _WordPressScraper, WP_LATEST_SEARCH
|
from .common import _WordPressScraper, WP_LATEST_SEARCH
|
||||||
from ..helpers import indirectStarter
|
from ..helpers import indirectStarter
|
||||||
|
|
||||||
|
|
||||||
class _WebcomicFactory(_WordPressScraper):
|
class _WebcomicFactory(_WordPressScraper):
|
||||||
starter = indirectStarter
|
starter = indirectStarter
|
||||||
latestSearch = WP_LATEST_SEARCH
|
latestSearch = WP_LATEST_SEARCH
|
||||||
|
@ -15,7 +16,7 @@ class _WebcomicFactory(_WordPressScraper):
|
||||||
|
|
||||||
# do not edit anything below since these entries are generated from
|
# do not edit anything below since these entries are generated from
|
||||||
# scripts/update_plugins.sh
|
# scripts/update_plugins.sh
|
||||||
# DO NOT REMOVE
|
# START AUTOUPDATE
|
||||||
|
|
||||||
|
|
||||||
class AsTheMayoTurns(_WebcomicFactory):
|
class AsTheMayoTurns(_WebcomicFactory):
|
||||||
|
@ -166,3 +167,4 @@ class WeirdBikerTales(_WebcomicFactory):
|
||||||
class WillysSpaceDive(_WebcomicFactory):
|
class WillysSpaceDive(_WebcomicFactory):
|
||||||
url = 'http://www.thewebcomicfactory.com/comic/willys-space-dive/'
|
url = 'http://www.thewebcomicfactory.com/comic/willys-space-dive/'
|
||||||
firstStripUrl = url
|
firstStripUrl = url
|
||||||
|
# END AUTOUPDATE
|
||||||
|
|
|
@ -35,7 +35,7 @@ class ArcamaxUpdater(ComicListUpdater):
|
||||||
"""Parse all search result pages."""
|
"""Parse all search result pages."""
|
||||||
self.handle_url('http://www.arcamax.com/comics')
|
self.handle_url('http://www.arcamax.com/comics')
|
||||||
|
|
||||||
def get_classdef(self, name, entry):
|
def get_entry(self, name, entry):
|
||||||
return u"class %s(_Arcamax):\n path = %r" % (name, entry)
|
return u"class %s(_Arcamax):\n path = %r" % (name, entry)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -163,7 +163,7 @@ class ComicFuryUpdater(ComicListUpdater):
|
||||||
page += 1
|
page += 1
|
||||||
print(last_count, file=sys.stderr, end=" ")
|
print(last_count, file=sys.stderr, end=" ")
|
||||||
|
|
||||||
def get_classdef(self, name, entry):
|
def get_entry(self, name, entry):
|
||||||
url, active, lang = entry
|
url, active, lang = entry
|
||||||
langopt = ''
|
langopt = ''
|
||||||
if lang != "english":
|
if lang != "english":
|
||||||
|
|
|
@ -36,7 +36,7 @@ class CreatorsUpdater(ComicListUpdater):
|
||||||
self.handle_url('https://www.creators.com/categories/comics/all')
|
self.handle_url('https://www.creators.com/categories/comics/all')
|
||||||
self.handle_url('https://www.creators.com/categories/cartoons/all')
|
self.handle_url('https://www.creators.com/categories/cartoons/all')
|
||||||
|
|
||||||
def get_classdef(self, name, data):
|
def get_entry(self, name, data):
|
||||||
lang = 'Es' if name.lower().endswith('spanish') else ''
|
lang = 'Es' if name.lower().endswith('spanish') else ''
|
||||||
return u"class %s(_Creators%s):\n path = %r" % (name, lang, data)
|
return u"class %s(_Creators%s):\n path = %r" % (name, lang, data)
|
||||||
|
|
||||||
|
|
|
@ -51,7 +51,7 @@ class GoComicsUpdater(ComicListUpdater):
|
||||||
self.handle_url('http://www.gocomics.com/explore/editorial_list')
|
self.handle_url('http://www.gocomics.com/explore/editorial_list')
|
||||||
self.handle_url('http://www.gocomics.com/explore/sherpa_list')
|
self.handle_url('http://www.gocomics.com/explore/sherpa_list')
|
||||||
|
|
||||||
def get_classdef(self, name, url):
|
def get_entry(self, name, url):
|
||||||
return u"class GC%s(_GoComics%s):\n path = %r" % (
|
return u"class GC%s(_GoComics%s):\n path = %r" % (
|
||||||
name, 'Es' if 'espanol/' in url else '', url[1:])
|
name, 'Es' if 'espanol/' in url else '', url[1:])
|
||||||
|
|
||||||
|
|
|
@ -1,24 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
|
||||||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
|
||||||
"""Remove all lines after a given marker line."""
|
|
||||||
from __future__ import absolute_import, division, print_function
|
|
||||||
|
|
||||||
import fileinput
|
|
||||||
import sys
|
|
||||||
|
|
||||||
|
|
||||||
def main(args):
|
|
||||||
"""Remove lines after marker."""
|
|
||||||
filename = args[0]
|
|
||||||
marker = args[1]
|
|
||||||
for line in fileinput.input(filename, inplace=1):
|
|
||||||
print(line.rstrip())
|
|
||||||
if line.startswith(marker):
|
|
||||||
break
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main(sys.argv[1:])
|
|
|
@ -11,6 +11,11 @@ import sys
|
||||||
import json
|
import json
|
||||||
import codecs
|
import codecs
|
||||||
|
|
||||||
|
try:
|
||||||
|
from os import replace as rename
|
||||||
|
except ImportError:
|
||||||
|
from os import rename
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
|
||||||
|
@ -28,6 +33,9 @@ class ComicListUpdater(object):
|
||||||
dup_templates = ()
|
dup_templates = ()
|
||||||
excluded_comics = ()
|
excluded_comics = ()
|
||||||
|
|
||||||
|
START = "# START AUTOUPDATE"
|
||||||
|
END = "# END AUTOUPDATE"
|
||||||
|
|
||||||
def __init__(self, name):
|
def __init__(self, name):
|
||||||
self.json = name.replace(".py", ".json")
|
self.json = name.replace(".py", ".json")
|
||||||
self.session = requests.Session()
|
self.session = requests.Session()
|
||||||
|
@ -79,22 +87,48 @@ class ComicListUpdater(object):
|
||||||
comic strips."""
|
comic strips."""
|
||||||
min_comics, filename = args
|
min_comics, filename = args
|
||||||
min_comics = int(min_comics)
|
min_comics = int(min_comics)
|
||||||
with codecs.open(filename, 'a', 'utf-8') as fp:
|
oldf = codecs.open(filename, 'r', 'utf-8')
|
||||||
|
newf = codecs.open(filename + '.new', 'w', 'utf-8')
|
||||||
|
with oldf, newf:
|
||||||
|
indent = self.copy_until_start(oldf, newf)
|
||||||
with codecs.open(self.json, 'rb', 'utf-8') as f:
|
with codecs.open(self.json, 'rb', 'utf-8') as f:
|
||||||
data = json.load(f)
|
data = json.load(f)
|
||||||
for name, entry in sorted(data.items(), key=first_lower):
|
for name, entry in sorted(data.items(), key=first_lower):
|
||||||
if name in self.excluded_comics:
|
self.write_entry(newf, name, entry, min_comics, indent)
|
||||||
continue
|
self.copy_after_end(oldf, newf)
|
||||||
count = entry['count']
|
rename(filename + '.new', filename)
|
||||||
if count and count < min_comics:
|
|
||||||
continue
|
def copy_until_start(self, src, dest):
|
||||||
dup = self.find_dups(name)
|
for line in src:
|
||||||
if dup is not None:
|
dest.write(line)
|
||||||
fp.write(u"# %s has a duplicate in %s\n" % (name, dup))
|
if line.strip().startswith(self.START):
|
||||||
else:
|
return line.find(self.START)
|
||||||
fp.write(u"\n\n%s\n" %
|
raise RuntimeError("can't find start marker!")
|
||||||
self.get_classdef(truncate_name(name),
|
|
||||||
entry['data']))
|
def copy_after_end(self, src, dest):
|
||||||
|
skip = True
|
||||||
|
for line in src:
|
||||||
|
if line.strip().startswith(self.END):
|
||||||
|
skip = False
|
||||||
|
if not skip:
|
||||||
|
dest.write(line)
|
||||||
|
if skip:
|
||||||
|
raise RuntimeError("can't find end marker!")
|
||||||
|
|
||||||
|
def write_entry(self, fp, name, entry, min_comics, indent):
|
||||||
|
if name in self.excluded_comics:
|
||||||
|
return
|
||||||
|
count = entry['count']
|
||||||
|
if count and count < min_comics:
|
||||||
|
return
|
||||||
|
dup = self.find_dups(name)
|
||||||
|
fp.write(" " * indent)
|
||||||
|
if dup is not None:
|
||||||
|
fp.write(u"# %s has a duplicate in %s\n" % (name, dup))
|
||||||
|
else:
|
||||||
|
fp.write(self.get_entry(
|
||||||
|
truncate_name(name),
|
||||||
|
entry['data']).replace("\n", "\n" + (" " * indent)) + "\n")
|
||||||
|
|
||||||
def find_dups(self, name):
|
def find_dups(self, name):
|
||||||
"""Check if comic name already exists."""
|
"""Check if comic name already exists."""
|
||||||
|
@ -106,7 +140,8 @@ class ComicListUpdater(object):
|
||||||
return scraperobj.name
|
return scraperobj.name
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def get_classdef(self, name, data):
|
def get_entry(self, name, data):
|
||||||
|
"""Return an entry for the module generator."""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
|
|
|
@ -162,7 +162,7 @@ class SmackJeevesUpdater(ComicListUpdater):
|
||||||
print(last_count, file=sys.stderr, end=" ")
|
print(last_count, file=sys.stderr, end=" ")
|
||||||
next_url, last_count = self.handle_url(next_url)
|
next_url, last_count = self.handle_url(next_url)
|
||||||
|
|
||||||
def get_classdef(self, name, data):
|
def get_entry(self, name, data):
|
||||||
sub, top = urlsplit(data[0]).hostname.split('.', 1)
|
sub, top = urlsplit(data[0]).hostname.split('.', 1)
|
||||||
cl = u"class SJ%s(_SmackJeeves):" % name
|
cl = u"class SJ%s(_SmackJeeves):" % name
|
||||||
if top.lower() == "smackjeeves.com":
|
if top.lower() == "smackjeeves.com":
|
||||||
|
|
|
@ -14,6 +14,5 @@ fi
|
||||||
for script in $list; do
|
for script in $list; do
|
||||||
target="${d}/../dosagelib/plugins/${script}.py"
|
target="${d}/../dosagelib/plugins/${script}.py"
|
||||||
echo "Upating $target"
|
echo "Upating $target"
|
||||||
"${d}/removeafter.py" "$target" "# DO NOT REMOVE"
|
|
||||||
"${d}/${script}.py" $mincomics "$target"
|
"${d}/${script}.py" $mincomics "$target"
|
||||||
done
|
done
|
||||||
|
|
|
@ -39,7 +39,7 @@ class WebComicFactoryUpdater(ComicListUpdater):
|
||||||
comicurl = self.find_first(comicurl)
|
comicurl = self.find_first(comicurl)
|
||||||
self.add_comic(name, comicurl)
|
self.add_comic(name, comicurl)
|
||||||
|
|
||||||
def get_classdef(self, name, url):
|
def get_entry(self, name, url):
|
||||||
return (u"class %s(_WebcomicFactory):\n url = %r\n" % (name, url) +
|
return (u"class %s(_WebcomicFactory):\n url = %r\n" % (name, url) +
|
||||||
u" firstStripUrl = url")
|
u" firstStripUrl = url")
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue