diff options
Diffstat (limited to 'net-misc/streamlink/files/streamlink-6.4.0-libxml2-2.12.0.patch')
-rw-r--r-- | net-misc/streamlink/files/streamlink-6.4.0-libxml2-2.12.0.patch | 199 |
1 files changed, 0 insertions, 199 deletions
diff --git a/net-misc/streamlink/files/streamlink-6.4.0-libxml2-2.12.0.patch b/net-misc/streamlink/files/streamlink-6.4.0-libxml2-2.12.0.patch deleted file mode 100644 index ed5fd30366d2..000000000000 --- a/net-misc/streamlink/files/streamlink-6.4.0-libxml2-2.12.0.patch +++ /dev/null @@ -1,199 +0,0 @@ -https://github.com/streamlink/streamlink/commit/9d8156dd794ee0919297cd90d85bcc11b8a28358 - -From 9d8156dd794ee0919297cd90d85bcc11b8a28358 Mon Sep 17 00:00:00 2001 -From: bastimeyer <mail@bastimeyer.de> -Date: Tue, 21 Nov 2023 20:10:47 +0100 -Subject: [PATCH] utils.parse: fix libxml2 2.12.0 compatibility - ---- - src/streamlink/compat.py | 11 ++++ - src/streamlink/utils/parse.py | 17 +++++- - tests/utils/test_parse.py | 112 ++++++++++++++++++++++++++-------- - 3 files changed, 114 insertions(+), 26 deletions(-) - -diff --git a/src/streamlink/compat.py b/src/streamlink/compat.py -index c75201544d3..993bce64cfd 100644 ---- a/src/streamlink/compat.py -+++ b/src/streamlink/compat.py -@@ -2,11 +2,22 @@ - import sys - - -+# compatibility import of charset_normalizer/chardet via requests<3.0 -+try: -+ from requests.compat import chardet as charset_normalizer # type: ignore -+except ImportError: # pragma: no cover -+ import charset_normalizer -+ -+ - is_darwin = sys.platform == "darwin" - is_win32 = os.name == "nt" - - -+detect_encoding = charset_normalizer.detect -+ -+ - __all__ = [ - "is_darwin", - "is_win32", -+ "detect_encoding", - ] -diff --git a/src/streamlink/utils/parse.py b/src/streamlink/utils/parse.py -index 8c9f79c8b51..17479b81f59 100644 ---- a/src/streamlink/utils/parse.py -+++ b/src/streamlink/utils/parse.py -@@ -4,6 +4,7 @@ - - from lxml.etree import HTML, XML - -+from streamlink.compat import detect_encoding - from streamlink.plugin import PluginError - - -@@ -51,7 +52,21 @@ def parse_html( - - Removes XML declarations of invalid XHTML5 documents - - Wraps errors in custom exception with a snippet of the data in the message - """ -- if isinstance(data, str) and data.lstrip().startswith("<?xml"): -+ # strip XML text declarations from XHTML5 documents which were incorrectly defined as HTML5 -+ is_bytes = isinstance(data, bytes) -+ if data and data.lstrip()[:5].lower() == (b"<?xml" if is_bytes else "<?xml"): -+ if is_bytes: -+ # get the document's encoding using the "encoding" attribute value of the XML text declaration -+ match = re.match(rb"^\s*<\?xml\s.*?encoding=(?P<q>[\'\"])(?P<encoding>.+?)(?P=q).*?\?>", data, re.IGNORECASE) -+ if match: -+ encoding_value = detect_encoding(match["encoding"])["encoding"] -+ encoding = match["encoding"].decode(encoding_value) -+ else: -+ # no "encoding" attribute: try to figure out encoding from the document's content -+ encoding = detect_encoding(data)["encoding"] -+ -+ data = data.decode(encoding) -+ - data = re.sub(r"^\s*<\?xml.+?\?>", "", data) - - return _parse(HTML, data, name, exception, schema, *args, **kwargs) -diff --git a/tests/utils/test_parse.py b/tests/utils/test_parse.py -index aedae7d4e8e..69c16f282b9 100644 ---- a/tests/utils/test_parse.py -+++ b/tests/utils/test_parse.py -@@ -74,31 +74,93 @@ def test_parse_xml_entities(self): - assert actual.tag == expected.tag - assert actual.attrib == expected.attrib - -- def test_parse_xml_encoding(self): -- tree = parse_xml("""<?xml version="1.0" encoding="UTF-8"?><test>ä</test>""") -- assert tree.xpath(".//text()") == ["ä"] -- tree = parse_xml("""<test>ä</test>""") -- assert tree.xpath(".//text()") == ["ä"] -- tree = parse_xml(b"""<?xml version="1.0" encoding="UTF-8"?><test>\xC3\xA4</test>""") -- assert tree.xpath(".//text()") == ["ä"] -- tree = parse_xml(b"""<test>\xC3\xA4</test>""") -- assert tree.xpath(".//text()") == ["ä"] -- -- def test_parse_html_encoding(self): -- tree = parse_html("""<!DOCTYPE html><html><head><meta charset="utf-8"/></head><body>ä</body></html>""") -- assert tree.xpath(".//body/text()") == ["ä"] -- tree = parse_html("""<!DOCTYPE html><html><body>ä</body></html>""") -- assert tree.xpath(".//body/text()") == ["ä"] -- tree = parse_html(b"""<!DOCTYPE html><html><meta charset="utf-8"/><body>\xC3\xA4</body></html>""") -- assert tree.xpath(".//body/text()") == ["ä"] -- tree = parse_html(b"""<!DOCTYPE html><html><body>\xC3\xA4</body></html>""") -- assert tree.xpath(".//body/text()") == ["ä"] -- -- def test_parse_html_xhtml5(self): -- tree = parse_html("""<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html><html><body>ä?></body></html>""") -- assert tree.xpath(".//body/text()") == ["ä?>"] -- tree = parse_html(b"""<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html><html><body>\xC3\xA4?></body></html>""") -- assert tree.xpath(".//body/text()") == ["ä?>"] -+ @pytest.mark.parametrize(("content", "expected"), [ -+ pytest.param( -+ """<?xml version="1.0" encoding="UTF-8"?><test>ä</test>""", -+ "ä", -+ id="string-utf-8", -+ ), -+ pytest.param( -+ """<test>ä</test>""", -+ "ä", -+ id="string-unknown", -+ ), -+ pytest.param( -+ b"""<?xml version="1.0" encoding="UTF-8"?><test>\xC3\xA4</test>""", -+ "ä", -+ id="bytes-utf-8", -+ ), -+ pytest.param( -+ b"""<?xml version="1.0" encoding="ISO-8859-1"?><test>\xE4</test>""", -+ "ä", -+ id="bytes-iso-8859-1", -+ ), -+ pytest.param( -+ b"""<test>\xC3\xA4</test>""", -+ "ä", -+ id="bytes-unknown", -+ ), -+ ]) -+ def test_parse_xml_encoding(self, content, expected): -+ tree = parse_xml(content) -+ assert tree.xpath(".//text()") == [expected] -+ -+ @pytest.mark.parametrize(("content", "expected"), [ -+ pytest.param( -+ """<!DOCTYPE html><html><head><meta charset="utf-8"/></head><body>ä</body></html>""", -+ "ä", -+ id="string-utf-8", -+ ), -+ pytest.param( -+ """<!DOCTYPE html><html><body>ä</body></html>""", -+ "ä", -+ id="string-unknown", -+ ), -+ pytest.param( -+ b"""<!DOCTYPE html><html><head><meta charset="utf-8"/></head><body>\xC3\xA4</body></html>""", -+ "ä", -+ id="bytes-utf-8", -+ ), -+ pytest.param( -+ b"""<!DOCTYPE html><html><head><meta charset="ISO-8859-1"/></head><body>\xE4</body></html>""", -+ "ä", -+ id="bytes-iso-8859-1", -+ ), -+ pytest.param( -+ b"""<!DOCTYPE html><html><body>\xC3\xA4</body></html>""", -+ "ä", -+ id="bytes-unknown", -+ ), -+ ]) -+ def test_parse_html_encoding(self, content, expected): -+ tree = parse_html(content) -+ assert tree.xpath(".//body/text()") == [expected] -+ -+ @pytest.mark.parametrize(("content", "expected"), [ -+ pytest.param( -+ """<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html><html><body>ä?></body></html>""", -+ "ä?>", -+ id="string", -+ ), -+ pytest.param( -+ b"""<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html><html><body>\xC3\xA4?></body></html>""", -+ "ä?>", -+ id="bytes-utf-8", -+ ), -+ pytest.param( -+ b"""<?xml version="1.0" encoding="ISO-8859-1"?><!DOCTYPE html><html><body>\xE4?></body></html>""", -+ "ä?>", -+ id="bytes-iso-8859-1", -+ ), -+ pytest.param( -+ b"""<?xml version="1.0"?><!DOCTYPE html><html><body>\xC3\xA4?></body></html>""", -+ "ä?>", -+ id="bytes-unknown", -+ ), -+ ]) -+ def test_parse_html_xhtml5(self, content, expected): -+ tree = parse_html(content) -+ assert tree.xpath(".//body/text()") == [expected] - - def test_parse_qsd(self): - assert parse_qsd("test=1&foo=bar", schema=validate.Schema({"test": str, "foo": "bar"})) == {"test": "1", "foo": "bar"} - |