Minor cleanups for new modules (see #84).

This commit is contained in:
Tobias Gruetzmacher 2017-04-16 01:28:17 +02:00
parent 233da3e052
commit 593975d907
2 changed files with 12 additions and 17 deletions

View file

@ -496,19 +496,16 @@ class SupernormalStep(_ComicControlScraper):
class SurvivingTheWorld(_ParserScraper):
url = 'http://survivingtheworld.net/'
stripUrl = url + '%s'
firstStripUrl = stripUrl % 'Lesson1.html'
imageSearch = [
stripUrl = url + '%s.html'
firstStripUrl = stripUrl % 'Lesson1'
imageSearch = (
'//div[@class="img"]/img', # When there's one image per strip
'//div[@class="img"]/p/img', # When there's multiple images per strip
'//td/img' # Special case for Lesson1296.html
]
prevSearch = [
)
prevSearch = (
'//li[@class="previous"]/a',
'//td/a' # Special case for Lesson1296.html
]
)
multipleImagesPerStrip = True
help = 'Index format: name'
def getIndexStripUrl(self, index):
return self.stripUrl % index + ".html"

View file

@ -149,22 +149,20 @@ class TracyAndTristan(_BasicScraper):
class TumbleDryComics(_WordPressScraper):
url = 'http://tumbledrycomics.com/'
firstStripUrl = url + 'comic/we-need-to-get-high-jpg/'
stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % 'we-need-to-get-high-jpg'
textSearch = '//div[@id="comic"]//img/@alt'
multipleImagesPerStrip = True
adult = True
help = 'Index format: name'
def getIndexStripUrl(self, index):
return self.url + "comics/" + index
def namer(self, image_url, page_url):
# Most images have the date they were posted in the filename
# For those that don't we can get the month and year from the image url
parts = image_url.split('/')
year = parts[5]
month = parts[6]
filename = parts[7]
parts = image_url.rsplit('/', 3)
year = parts[1]
month = parts[2]
filename = parts[3]
if not filename.startswith(year):
filename = year + "-" + month + "-" + filename
return filename