Make auto-update script more flexible.
This commit is contained in:
parent
ca1c32cf09
commit
f29472c143
18 changed files with 74 additions and 54 deletions
|
@ -22,7 +22,7 @@ class _Arcamax(_ParserScraper):
|
|||
|
||||
# do not edit anything below since these entries are generated from
|
||||
# scripts/update_plugins.sh
|
||||
# DO NOT REMOVE
|
||||
# START AUTOUPDATE
|
||||
# 9ChickweedLane has a duplicate in GoComics/9ChickweedLane
|
||||
# Agnes has a duplicate in GoComics/Agnes
|
||||
# AndyCapp has a duplicate in GoComics/AndyCapp
|
||||
|
@ -211,3 +211,4 @@ class TinasGroove(_Arcamax):
|
|||
|
||||
class Zits(_Arcamax):
|
||||
path = 'zits'
|
||||
# END AUTOUPDATE
|
||||
|
|
|
@ -46,7 +46,7 @@ class CFDandyAndCompany(_ComicFury):
|
|||
|
||||
# do not edit anything below since these entries are generated from
|
||||
# scripts/update_plugins.sh
|
||||
# DO NOT REMOVE
|
||||
# START AUTOUPDATE
|
||||
|
||||
|
||||
class CF0Eight(_ComicFury):
|
||||
|
@ -4018,3 +4018,4 @@ class CFZeroEffortFantasy(_ComicFury):
|
|||
class CFZwergElf(_ComicFury):
|
||||
sub = 'zwergelf'
|
||||
lang = 'de'
|
||||
# END AUTOUPDATE
|
||||
|
|
|
@ -50,7 +50,7 @@ class ComicGenesis(_BasicScraper):
|
|||
return [
|
||||
# do not edit anything below since these entries are generated from
|
||||
# scripts/update_plugins.sh
|
||||
# DO NOT REMOVE
|
||||
# START AUTOUPDATE
|
||||
cls('AAAAA', 'aaaaa'),
|
||||
cls('AdventuresofKiltman', 'kiltman'),
|
||||
cls('AmorModerno', 'amormoderno'),
|
||||
|
@ -128,4 +128,5 @@ class ComicGenesis(_BasicScraper):
|
|||
cls('WebcomicTheWebcomicWebcomicWebcomicWebcomic', 'dannormnsanidey'),
|
||||
cls('WhatYouDontSee', 'phantomlady4'),
|
||||
cls('Wierdman', 'asa'),
|
||||
# END AUTOUPDATE
|
||||
]
|
||||
|
|
|
@ -86,7 +86,7 @@ class WinnieThePooh(_Creators):
|
|||
|
||||
# do not edit anything below since these entries are generated from
|
||||
# scripts/update_plugins.sh
|
||||
# DO NOT REMOVE
|
||||
# START AUTOUPDATE
|
||||
# Agnes has a duplicate in gocomics
|
||||
# AndyCapp has a duplicate in gocomics
|
||||
class AndyMarlette(_Creators):
|
||||
|
@ -163,3 +163,4 @@ class WizardOfIdSpanish(_CreatorsEs):
|
|||
|
||||
# WorkingItOut has a duplicate in gocomics
|
||||
# ZackHill has a duplicate in gocomics
|
||||
# END AUTOUPDATE
|
||||
|
|
|
@ -61,7 +61,7 @@ Overrides = {
|
|||
|
||||
|
||||
# do not edit anything below since these entries are generated from scripts/update.sh
|
||||
# DO NOT REMOVE
|
||||
# START AUTOUPDATE
|
||||
add('12_Men_Died_Making_This_Strip', '12_Men_Died_Making_This_Strip')
|
||||
add('1337_Joe_and_Fellow_Seth', '1337_Joe_and_Fellow_Seth')
|
||||
add('20_Galaxies', '20_Galaxies')
|
||||
|
@ -1295,3 +1295,4 @@ add('Yamete_Kudasai', 'Yamete_Kudasai')
|
|||
add('Yaoi_Seth', 'Yaoi_Seth')
|
||||
add('Yeah_wait_what', 'Yeah_wait_what')
|
||||
add('Yoshi_Saga', 'Yoshi_Saga')
|
||||
# END AUTOUPDATE
|
||||
|
|
|
@ -620,7 +620,7 @@ class GCYouGuysAreMyFriendsTheComic(_GoComics):
|
|||
|
||||
# do not edit anything below since these entries are generated from
|
||||
# scripts/update_plugins.sh
|
||||
# DO NOT REMOVE
|
||||
# START AUTOUPDATE
|
||||
|
||||
|
||||
class GC060(_GoComics):
|
||||
|
@ -3481,3 +3481,4 @@ class GCZombieHeights(_GoComics):
|
|||
|
||||
class GCZootopia(_GoComics):
|
||||
path = 'zootopia'
|
||||
# END AUTOUPDATE
|
||||
|
|
|
@ -32,7 +32,7 @@ class KeenSpot(_BasicScraper):
|
|||
return [
|
||||
# do not edit anything below since these entries are generated from
|
||||
# scripts/update_plugins.sh
|
||||
# DO NOT REMOVE
|
||||
# START AUTOUPDATE
|
||||
cls('27TwentySeven', 'twenty-seven'),
|
||||
cls('Adventurers', 'adventurers'),
|
||||
cls('AntiheroForHire', 'antihero'),
|
||||
|
@ -76,4 +76,5 @@ class KeenSpot(_BasicScraper):
|
|||
cls('TheHuntersofSalamanstra', 'salamanstra'),
|
||||
cls('TheLounge', 'thelounge'),
|
||||
cls('WICKEDPOWERED', 'wickedpowered'),
|
||||
# END AUTOUPDATE
|
||||
]
|
||||
|
|
|
@ -85,7 +85,7 @@ class _SmackJeeves(_ParserScraper):
|
|||
|
||||
# do not edit anything below since these entries are generated from
|
||||
# scripts/update_plugins.sh
|
||||
# DO NOT REMOVE
|
||||
# START AUTOUPDATE
|
||||
|
||||
|
||||
class SJ20TimesKirby(_SmackJeeves):
|
||||
|
@ -2752,3 +2752,4 @@ class SJZeldaTheNewAdventureofLinkIIMajorasMask(_SmackJeeves):
|
|||
|
||||
class SJ_A_(_SmackJeeves):
|
||||
sub = 'a-the-stalker'
|
||||
# END AUTOUPDATE
|
||||
|
|
|
@ -8,6 +8,7 @@ from __future__ import absolute_import, division, print_function
|
|||
from .common import _WordPressScraper, WP_LATEST_SEARCH
|
||||
from ..helpers import indirectStarter
|
||||
|
||||
|
||||
class _WebcomicFactory(_WordPressScraper):
|
||||
starter = indirectStarter
|
||||
latestSearch = WP_LATEST_SEARCH
|
||||
|
@ -15,7 +16,7 @@ class _WebcomicFactory(_WordPressScraper):
|
|||
|
||||
# do not edit anything below since these entries are generated from
|
||||
# scripts/update_plugins.sh
|
||||
# DO NOT REMOVE
|
||||
# START AUTOUPDATE
|
||||
|
||||
|
||||
class AsTheMayoTurns(_WebcomicFactory):
|
||||
|
@ -166,3 +167,4 @@ class WeirdBikerTales(_WebcomicFactory):
|
|||
class WillysSpaceDive(_WebcomicFactory):
|
||||
url = 'http://www.thewebcomicfactory.com/comic/willys-space-dive/'
|
||||
firstStripUrl = url
|
||||
# END AUTOUPDATE
|
||||
|
|
|
@ -35,7 +35,7 @@ class ArcamaxUpdater(ComicListUpdater):
|
|||
"""Parse all search result pages."""
|
||||
self.handle_url('http://www.arcamax.com/comics')
|
||||
|
||||
def get_classdef(self, name, entry):
|
||||
def get_entry(self, name, entry):
|
||||
return u"class %s(_Arcamax):\n path = %r" % (name, entry)
|
||||
|
||||
|
||||
|
|
|
@ -163,7 +163,7 @@ class ComicFuryUpdater(ComicListUpdater):
|
|||
page += 1
|
||||
print(last_count, file=sys.stderr, end=" ")
|
||||
|
||||
def get_classdef(self, name, entry):
|
||||
def get_entry(self, name, entry):
|
||||
url, active, lang = entry
|
||||
langopt = ''
|
||||
if lang != "english":
|
||||
|
|
|
@ -36,7 +36,7 @@ class CreatorsUpdater(ComicListUpdater):
|
|||
self.handle_url('https://www.creators.com/categories/comics/all')
|
||||
self.handle_url('https://www.creators.com/categories/cartoons/all')
|
||||
|
||||
def get_classdef(self, name, data):
|
||||
def get_entry(self, name, data):
|
||||
lang = 'Es' if name.lower().endswith('spanish') else ''
|
||||
return u"class %s(_Creators%s):\n path = %r" % (name, lang, data)
|
||||
|
||||
|
|
|
@ -51,7 +51,7 @@ class GoComicsUpdater(ComicListUpdater):
|
|||
self.handle_url('http://www.gocomics.com/explore/editorial_list')
|
||||
self.handle_url('http://www.gocomics.com/explore/sherpa_list')
|
||||
|
||||
def get_classdef(self, name, url):
|
||||
def get_entry(self, name, url):
|
||||
return u"class GC%s(_GoComics%s):\n path = %r" % (
|
||||
name, 'Es' if 'espanol/' in url else '', url[1:])
|
||||
|
||||
|
|
|
@ -1,24 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||
"""Remove all lines after a given marker line."""
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
import fileinput
|
||||
import sys
|
||||
|
||||
|
||||
def main(args):
|
||||
"""Remove lines after marker."""
|
||||
filename = args[0]
|
||||
marker = args[1]
|
||||
for line in fileinput.input(filename, inplace=1):
|
||||
print(line.rstrip())
|
||||
if line.startswith(marker):
|
||||
break
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(sys.argv[1:])
|
|
@ -11,6 +11,11 @@ import sys
|
|||
import json
|
||||
import codecs
|
||||
|
||||
try:
|
||||
from os import replace as rename
|
||||
except ImportError:
|
||||
from os import rename
|
||||
|
||||
import requests
|
||||
from lxml import html
|
||||
|
||||
|
@ -28,6 +33,9 @@ class ComicListUpdater(object):
|
|||
dup_templates = ()
|
||||
excluded_comics = ()
|
||||
|
||||
START = "# START AUTOUPDATE"
|
||||
END = "# END AUTOUPDATE"
|
||||
|
||||
def __init__(self, name):
|
||||
self.json = name.replace(".py", ".json")
|
||||
self.session = requests.Session()
|
||||
|
@ -79,22 +87,48 @@ class ComicListUpdater(object):
|
|||
comic strips."""
|
||||
min_comics, filename = args
|
||||
min_comics = int(min_comics)
|
||||
with codecs.open(filename, 'a', 'utf-8') as fp:
|
||||
oldf = codecs.open(filename, 'r', 'utf-8')
|
||||
newf = codecs.open(filename + '.new', 'w', 'utf-8')
|
||||
with oldf, newf:
|
||||
indent = self.copy_until_start(oldf, newf)
|
||||
with codecs.open(self.json, 'rb', 'utf-8') as f:
|
||||
data = json.load(f)
|
||||
for name, entry in sorted(data.items(), key=first_lower):
|
||||
if name in self.excluded_comics:
|
||||
continue
|
||||
count = entry['count']
|
||||
if count and count < min_comics:
|
||||
continue
|
||||
dup = self.find_dups(name)
|
||||
if dup is not None:
|
||||
fp.write(u"# %s has a duplicate in %s\n" % (name, dup))
|
||||
else:
|
||||
fp.write(u"\n\n%s\n" %
|
||||
self.get_classdef(truncate_name(name),
|
||||
entry['data']))
|
||||
self.write_entry(newf, name, entry, min_comics, indent)
|
||||
self.copy_after_end(oldf, newf)
|
||||
rename(filename + '.new', filename)
|
||||
|
||||
def copy_until_start(self, src, dest):
|
||||
for line in src:
|
||||
dest.write(line)
|
||||
if line.strip().startswith(self.START):
|
||||
return line.find(self.START)
|
||||
raise RuntimeError("can't find start marker!")
|
||||
|
||||
def copy_after_end(self, src, dest):
|
||||
skip = True
|
||||
for line in src:
|
||||
if line.strip().startswith(self.END):
|
||||
skip = False
|
||||
if not skip:
|
||||
dest.write(line)
|
||||
if skip:
|
||||
raise RuntimeError("can't find end marker!")
|
||||
|
||||
def write_entry(self, fp, name, entry, min_comics, indent):
|
||||
if name in self.excluded_comics:
|
||||
return
|
||||
count = entry['count']
|
||||
if count and count < min_comics:
|
||||
return
|
||||
dup = self.find_dups(name)
|
||||
fp.write(" " * indent)
|
||||
if dup is not None:
|
||||
fp.write(u"# %s has a duplicate in %s\n" % (name, dup))
|
||||
else:
|
||||
fp.write(self.get_entry(
|
||||
truncate_name(name),
|
||||
entry['data']).replace("\n", "\n" + (" " * indent)) + "\n")
|
||||
|
||||
def find_dups(self, name):
|
||||
"""Check if comic name already exists."""
|
||||
|
@ -106,7 +140,8 @@ class ComicListUpdater(object):
|
|||
return scraperobj.name
|
||||
return None
|
||||
|
||||
def get_classdef(self, name, data):
|
||||
def get_entry(self, name, data):
|
||||
"""Return an entry for the module generator."""
|
||||
raise NotImplementedError
|
||||
|
||||
def run(self):
|
||||
|
|
|
@ -162,7 +162,7 @@ class SmackJeevesUpdater(ComicListUpdater):
|
|||
print(last_count, file=sys.stderr, end=" ")
|
||||
next_url, last_count = self.handle_url(next_url)
|
||||
|
||||
def get_classdef(self, name, data):
|
||||
def get_entry(self, name, data):
|
||||
sub, top = urlsplit(data[0]).hostname.split('.', 1)
|
||||
cl = u"class SJ%s(_SmackJeeves):" % name
|
||||
if top.lower() == "smackjeeves.com":
|
||||
|
|
|
@ -14,6 +14,5 @@ fi
|
|||
for script in $list; do
|
||||
target="${d}/../dosagelib/plugins/${script}.py"
|
||||
echo "Upating $target"
|
||||
"${d}/removeafter.py" "$target" "# DO NOT REMOVE"
|
||||
"${d}/${script}.py" $mincomics "$target"
|
||||
done
|
||||
|
|
|
@ -39,7 +39,7 @@ class WebComicFactoryUpdater(ComicListUpdater):
|
|||
comicurl = self.find_first(comicurl)
|
||||
self.add_comic(name, comicurl)
|
||||
|
||||
def get_classdef(self, name, url):
|
||||
def get_entry(self, name, url):
|
||||
return (u"class %s(_WebcomicFactory):\n url = %r\n" % (name, url) +
|
||||
u" firstStripUrl = url")
|
||||
|
||||
|
|
Loading…
Reference in a new issue