summaryrefslogtreecommitdiff
path: root/net-misc/streamlink/files/streamlink-6.4.0-libxml2-2.12.0.patch
diff options
context:
space:
mode:
Diffstat (limited to 'net-misc/streamlink/files/streamlink-6.4.0-libxml2-2.12.0.patch')
-rw-r--r--net-misc/streamlink/files/streamlink-6.4.0-libxml2-2.12.0.patch199
1 files changed, 199 insertions, 0 deletions
diff --git a/net-misc/streamlink/files/streamlink-6.4.0-libxml2-2.12.0.patch b/net-misc/streamlink/files/streamlink-6.4.0-libxml2-2.12.0.patch
new file mode 100644
index 000000000000..ed5fd30366d2
--- /dev/null
+++ b/net-misc/streamlink/files/streamlink-6.4.0-libxml2-2.12.0.patch
@@ -0,0 +1,199 @@
+https://github.com/streamlink/streamlink/commit/9d8156dd794ee0919297cd90d85bcc11b8a28358
+
+From 9d8156dd794ee0919297cd90d85bcc11b8a28358 Mon Sep 17 00:00:00 2001
+From: bastimeyer <mail@bastimeyer.de>
+Date: Tue, 21 Nov 2023 20:10:47 +0100
+Subject: [PATCH] utils.parse: fix libxml2 2.12.0 compatibility
+
+---
+ src/streamlink/compat.py | 11 ++++
+ src/streamlink/utils/parse.py | 17 +++++-
+ tests/utils/test_parse.py | 112 ++++++++++++++++++++++++++--------
+ 3 files changed, 114 insertions(+), 26 deletions(-)
+
+diff --git a/src/streamlink/compat.py b/src/streamlink/compat.py
+index c75201544d3..993bce64cfd 100644
+--- a/src/streamlink/compat.py
++++ b/src/streamlink/compat.py
+@@ -2,11 +2,22 @@
+ import sys
+
+
++# compatibility import of charset_normalizer/chardet via requests<3.0
++try:
++ from requests.compat import chardet as charset_normalizer # type: ignore
++except ImportError: # pragma: no cover
++ import charset_normalizer
++
++
+ is_darwin = sys.platform == "darwin"
+ is_win32 = os.name == "nt"
+
+
++detect_encoding = charset_normalizer.detect
++
++
+ __all__ = [
+ "is_darwin",
+ "is_win32",
++ "detect_encoding",
+ ]
+diff --git a/src/streamlink/utils/parse.py b/src/streamlink/utils/parse.py
+index 8c9f79c8b51..17479b81f59 100644
+--- a/src/streamlink/utils/parse.py
++++ b/src/streamlink/utils/parse.py
+@@ -4,6 +4,7 @@
+
+ from lxml.etree import HTML, XML
+
++from streamlink.compat import detect_encoding
+ from streamlink.plugin import PluginError
+
+
+@@ -51,7 +52,21 @@ def parse_html(
+ - Removes XML declarations of invalid XHTML5 documents
+ - Wraps errors in custom exception with a snippet of the data in the message
+ """
+- if isinstance(data, str) and data.lstrip().startswith("<?xml"):
++ # strip XML text declarations from XHTML5 documents which were incorrectly defined as HTML5
++ is_bytes = isinstance(data, bytes)
++ if data and data.lstrip()[:5].lower() == (b"<?xml" if is_bytes else "<?xml"):
++ if is_bytes:
++ # get the document's encoding using the "encoding" attribute value of the XML text declaration
++ match = re.match(rb"^\s*<\?xml\s.*?encoding=(?P<q>[\'\"])(?P<encoding>.+?)(?P=q).*?\?>", data, re.IGNORECASE)
++ if match:
++ encoding_value = detect_encoding(match["encoding"])["encoding"]
++ encoding = match["encoding"].decode(encoding_value)
++ else:
++ # no "encoding" attribute: try to figure out encoding from the document's content
++ encoding = detect_encoding(data)["encoding"]
++
++ data = data.decode(encoding)
++
+ data = re.sub(r"^\s*<\?xml.+?\?>", "", data)
+
+ return _parse(HTML, data, name, exception, schema, *args, **kwargs)
+diff --git a/tests/utils/test_parse.py b/tests/utils/test_parse.py
+index aedae7d4e8e..69c16f282b9 100644
+--- a/tests/utils/test_parse.py
++++ b/tests/utils/test_parse.py
+@@ -74,31 +74,93 @@ def test_parse_xml_entities(self):
+ assert actual.tag == expected.tag
+ assert actual.attrib == expected.attrib
+
+- def test_parse_xml_encoding(self):
+- tree = parse_xml("""<?xml version="1.0" encoding="UTF-8"?><test>ä</test>""")
+- assert tree.xpath(".//text()") == ["ä"]
+- tree = parse_xml("""<test>ä</test>""")
+- assert tree.xpath(".//text()") == ["ä"]
+- tree = parse_xml(b"""<?xml version="1.0" encoding="UTF-8"?><test>\xC3\xA4</test>""")
+- assert tree.xpath(".//text()") == ["ä"]
+- tree = parse_xml(b"""<test>\xC3\xA4</test>""")
+- assert tree.xpath(".//text()") == ["ä"]
+-
+- def test_parse_html_encoding(self):
+- tree = parse_html("""<!DOCTYPE html><html><head><meta charset="utf-8"/></head><body>ä</body></html>""")
+- assert tree.xpath(".//body/text()") == ["ä"]
+- tree = parse_html("""<!DOCTYPE html><html><body>ä</body></html>""")
+- assert tree.xpath(".//body/text()") == ["ä"]
+- tree = parse_html(b"""<!DOCTYPE html><html><meta charset="utf-8"/><body>\xC3\xA4</body></html>""")
+- assert tree.xpath(".//body/text()") == ["ä"]
+- tree = parse_html(b"""<!DOCTYPE html><html><body>\xC3\xA4</body></html>""")
+- assert tree.xpath(".//body/text()") == ["ä"]
+-
+- def test_parse_html_xhtml5(self):
+- tree = parse_html("""<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html><html><body>ä?></body></html>""")
+- assert tree.xpath(".//body/text()") == ["ä?>"]
+- tree = parse_html(b"""<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html><html><body>\xC3\xA4?></body></html>""")
+- assert tree.xpath(".//body/text()") == ["ä?>"]
++ @pytest.mark.parametrize(("content", "expected"), [
++ pytest.param(
++ """<?xml version="1.0" encoding="UTF-8"?><test>ä</test>""",
++ "ä",
++ id="string-utf-8",
++ ),
++ pytest.param(
++ """<test>ä</test>""",
++ "ä",
++ id="string-unknown",
++ ),
++ pytest.param(
++ b"""<?xml version="1.0" encoding="UTF-8"?><test>\xC3\xA4</test>""",
++ "ä",
++ id="bytes-utf-8",
++ ),
++ pytest.param(
++ b"""<?xml version="1.0" encoding="ISO-8859-1"?><test>\xE4</test>""",
++ "ä",
++ id="bytes-iso-8859-1",
++ ),
++ pytest.param(
++ b"""<test>\xC3\xA4</test>""",
++ "ä",
++ id="bytes-unknown",
++ ),
++ ])
++ def test_parse_xml_encoding(self, content, expected):
++ tree = parse_xml(content)
++ assert tree.xpath(".//text()") == [expected]
++
++ @pytest.mark.parametrize(("content", "expected"), [
++ pytest.param(
++ """<!DOCTYPE html><html><head><meta charset="utf-8"/></head><body>ä</body></html>""",
++ "ä",
++ id="string-utf-8",
++ ),
++ pytest.param(
++ """<!DOCTYPE html><html><body>ä</body></html>""",
++ "ä",
++ id="string-unknown",
++ ),
++ pytest.param(
++ b"""<!DOCTYPE html><html><head><meta charset="utf-8"/></head><body>\xC3\xA4</body></html>""",
++ "ä",
++ id="bytes-utf-8",
++ ),
++ pytest.param(
++ b"""<!DOCTYPE html><html><head><meta charset="ISO-8859-1"/></head><body>\xE4</body></html>""",
++ "ä",
++ id="bytes-iso-8859-1",
++ ),
++ pytest.param(
++ b"""<!DOCTYPE html><html><body>\xC3\xA4</body></html>""",
++ "ä",
++ id="bytes-unknown",
++ ),
++ ])
++ def test_parse_html_encoding(self, content, expected):
++ tree = parse_html(content)
++ assert tree.xpath(".//body/text()") == [expected]
++
++ @pytest.mark.parametrize(("content", "expected"), [
++ pytest.param(
++ """<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html><html><body>ä?></body></html>""",
++ "ä?>",
++ id="string",
++ ),
++ pytest.param(
++ b"""<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html><html><body>\xC3\xA4?></body></html>""",
++ "ä?>",
++ id="bytes-utf-8",
++ ),
++ pytest.param(
++ b"""<?xml version="1.0" encoding="ISO-8859-1"?><!DOCTYPE html><html><body>\xE4?></body></html>""",
++ "ä?>",
++ id="bytes-iso-8859-1",
++ ),
++ pytest.param(
++ b"""<?xml version="1.0"?><!DOCTYPE html><html><body>\xC3\xA4?></body></html>""",
++ "ä?>",
++ id="bytes-unknown",
++ ),
++ ])
++ def test_parse_html_xhtml5(self, content, expected):
++ tree = parse_html(content)
++ assert tree.xpath(".//body/text()") == [expected]
+
+ def test_parse_qsd(self):
+ assert parse_qsd("test=1&foo=bar", schema=validate.Schema({"test": str, "foo": "bar"})) == {"test": "1", "foo": "bar"}
+