From 67d1ee281bdcbedba50a3674597067643b40824f Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Mon, 6 Jun 2022 14:11:07 +0200 Subject: [PATCH] Ignore "usemap" attribute on images --- dosagelib/scraper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py index d65c8be09..7078f842b 100644 --- a/dosagelib/scraper.py +++ b/dosagelib/scraper.py @@ -12,7 +12,7 @@ from urllib.parse import urljoin from typing import Collection, Dict, List, Optional, Type, Union, Pattern, Sequence import lxml -from lxml.html.defs import link_attrs as html_link_attrs +from lxml.html.defs import link_attrs as lxml_link_attrs try: import cssselect @@ -34,6 +34,7 @@ from .xml import NS ARCHIVE_ORG_URL = re.compile(r'https?://web\.archive\.org/web/[^/]*/') +html_link_attrs = lxml_link_attrs - {'usemap'} if lxml.etree.LIBXML_VERSION < (2, 9, 3):