1 files changed, 199 insertions, 0 deletions
diff --git a/net-misc/streamlink/files/streamlink-6.4.0-libxml2-2.12.0.patch b/net-misc/streamlink/files/streamlink-6.4.0-libxml2-2.12.0.patch
new file mode 100644
index 000000000000..ed5fd30366d2
--- /dev/null
+++ b/net-misc/streamlink/files/streamlink-6.4.0-libxml2-2.12.0.patch
@@ -0,0 +1,199 @@
+https://github.com/streamlink/streamlink/commit/9d8156dd794ee0919297cd90d85bcc11b8a28358
+
+From 9d8156dd794ee0919297cd90d85bcc11b8a28358 Mon Sep 17 00:00:00 2001
+From: bastimeyer <mail@bastimeyer.de>
+Date: Tue, 21 Nov 2023 20:10:47 +0100
+Subject: [PATCH] utils.parse: fix libxml2 2.12.0 compatibility
+
+---
+ src/streamlink/compat.py      |  11 ++++
+ src/streamlink/utils/parse.py |  17 +++++-
+ tests/utils/test_parse.py     | 112 ++++++++++++++++++++++++++--------
+ 3 files changed, 114 insertions(+), 26 deletions(-)
+
+diff --git a/src/streamlink/compat.py b/src/streamlink/compat.py
+index c75201544d3..993bce64cfd 100644
+--- a/src/streamlink/compat.py
++++ b/src/streamlink/compat.py
+@@ -2,11 +2,22 @@
+ import sys
+ 
+ 
++# compatibility import of charset_normalizer/chardet via requests<3.0
++try:
++    from requests.compat import chardet as charset_normalizer  # type: ignore
++except ImportError:  # pragma: no cover
++    import charset_normalizer
++
++
+ is_darwin = sys.platform == "darwin"
+ is_win32 = os.name == "nt"
+ 
+ 
++detect_encoding = charset_normalizer.detect
++
++
+ __all__ = [
+     "is_darwin",
+     "is_win32",
++    "detect_encoding",
+ ]
+diff --git a/src/streamlink/utils/parse.py b/src/streamlink/utils/parse.py
+index 8c9f79c8b51..17479b81f59 100644
+--- a/src/streamlink/utils/parse.py
++++ b/src/streamlink/utils/parse.py
+@@ -4,6 +4,7 @@
+ 
+ from lxml.etree import HTML, XML
+ 
++from streamlink.compat import detect_encoding
+ from streamlink.plugin import PluginError
+ 
+ 
+@@ -51,7 +52,21 @@ def parse_html(
+      - Removes XML declarations of invalid XHTML5 documents
+      - Wraps errors in custom exception with a snippet of the data in the message
+     """
+-    if isinstance(data, str) and data.lstrip().startswith("<?xml"):
++    # strip XML text declarations from XHTML5 documents which were incorrectly defined as HTML5
++    is_bytes = isinstance(data, bytes)
++    if data and data.lstrip()[:5].lower() == (b"<?xml" if is_bytes else "<?xml"):
++        if is_bytes:
++            # get the document's encoding using the "encoding" attribute value of the XML text declaration
++            match = re.match(rb"^\s*<\?xml\s.*?encoding=(?P<q>[\'\"])(?P<encoding>.+?)(?P=q).*?\?>", data, re.IGNORECASE)
++            if match:
++                encoding_value = detect_encoding(match["encoding"])["encoding"]
++                encoding = match["encoding"].decode(encoding_value)
++            else:
++                # no "encoding" attribute: try to figure out encoding from the document's content
++                encoding = detect_encoding(data)["encoding"]
++
++            data = data.decode(encoding)
++
+         data = re.sub(r"^\s*<\?xml.+?\?>", "", data)
+ 
+     return _parse(HTML, data, name, exception, schema, *args, **kwargs)
+diff --git a/tests/utils/test_parse.py b/tests/utils/test_parse.py
+index aedae7d4e8e..69c16f282b9 100644
+--- a/tests/utils/test_parse.py
++++ b/tests/utils/test_parse.py
+@@ -74,31 +74,93 @@ def test_parse_xml_entities(self):
+         assert actual.tag == expected.tag
+         assert actual.attrib == expected.attrib
+ 
+-    def test_parse_xml_encoding(self):
+-        tree = parse_xml("""<?xml version="1.0" encoding="UTF-8"?><test>ä</test>""")
+-        assert tree.xpath(".//text()") == ["ä"]
+-        tree = parse_xml("""<test>ä</test>""")
+-        assert tree.xpath(".//text()") == ["ä"]
+-        tree = parse_xml(b"""<?xml version="1.0" encoding="UTF-8"?><test>\xC3\xA4</test>""")
+-        assert tree.xpath(".//text()") == ["ä"]
+-        tree = parse_xml(b"""<test>\xC3\xA4</test>""")
+-        assert tree.xpath(".//text()") == ["ä"]
+-
+-    def test_parse_html_encoding(self):
+-        tree = parse_html("""<!DOCTYPE html><html><head><meta charset="utf-8"/></head><body>ä</body></html>""")
+-        assert tree.xpath(".//body/text()") == ["ä"]
+-        tree = parse_html("""<!DOCTYPE html><html><body>ä</body></html>""")
+-        assert tree.xpath(".//body/text()") == ["ä"]
+-        tree = parse_html(b"""<!DOCTYPE html><html><meta charset="utf-8"/><body>\xC3\xA4</body></html>""")
+-        assert tree.xpath(".//body/text()") == ["ä"]
+-        tree = parse_html(b"""<!DOCTYPE html><html><body>\xC3\xA4</body></html>""")
+-        assert tree.xpath(".//body/text()") == ["Ã¤"]
+-
+-    def test_parse_html_xhtml5(self):
+-        tree = parse_html("""<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html><html><body>ä?></body></html>""")
+-        assert tree.xpath(".//body/text()") == ["ä?>"]
+-        tree = parse_html(b"""<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html><html><body>\xC3\xA4?></body></html>""")
+-        assert tree.xpath(".//body/text()") == ["ä?>"]
++    @pytest.mark.parametrize(("content", "expected"), [
++        pytest.param(
++            """<?xml version="1.0" encoding="UTF-8"?><test>ä</test>""",
++            "ä",
++            id="string-utf-8",
++        ),
++        pytest.param(
++            """<test>ä</test>""",
++            "ä",
++            id="string-unknown",
++        ),
++        pytest.param(
++            b"""<?xml version="1.0" encoding="UTF-8"?><test>\xC3\xA4</test>""",
++            "ä",
++            id="bytes-utf-8",
++        ),
++        pytest.param(
++            b"""<?xml version="1.0" encoding="ISO-8859-1"?><test>\xE4</test>""",
++            "ä",
++            id="bytes-iso-8859-1",
++        ),
++        pytest.param(
++            b"""<test>\xC3\xA4</test>""",
++            "ä",
++            id="bytes-unknown",
++        ),
++    ])
++    def test_parse_xml_encoding(self, content, expected):
++        tree = parse_xml(content)
++        assert tree.xpath(".//text()") == [expected]
++
++    @pytest.mark.parametrize(("content", "expected"), [
++        pytest.param(
++            """<!DOCTYPE html><html><head><meta charset="utf-8"/></head><body>ä</body></html>""",
++            "ä",
++            id="string-utf-8",
++        ),
++        pytest.param(
++            """<!DOCTYPE html><html><body>ä</body></html>""",
++            "ä",
++            id="string-unknown",
++        ),
++        pytest.param(
++            b"""<!DOCTYPE html><html><head><meta charset="utf-8"/></head><body>\xC3\xA4</body></html>""",
++            "ä",
++            id="bytes-utf-8",
++        ),
++        pytest.param(
++            b"""<!DOCTYPE html><html><head><meta charset="ISO-8859-1"/></head><body>\xE4</body></html>""",
++            "ä",
++            id="bytes-iso-8859-1",
++        ),
++        pytest.param(
++            b"""<!DOCTYPE html><html><body>\xC3\xA4</body></html>""",
++            "Ã¤",
++            id="bytes-unknown",
++        ),
++    ])
++    def test_parse_html_encoding(self, content, expected):
++        tree = parse_html(content)
++        assert tree.xpath(".//body/text()") == [expected]
++
++    @pytest.mark.parametrize(("content", "expected"), [
++        pytest.param(
++            """<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html><html><body>ä?></body></html>""",
++            "ä?>",
++            id="string",
++        ),
++        pytest.param(
++            b"""<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html><html><body>\xC3\xA4?></body></html>""",
++            "ä?>",
++            id="bytes-utf-8",
++        ),
++        pytest.param(
++            b"""<?xml version="1.0" encoding="ISO-8859-1"?><!DOCTYPE html><html><body>\xE4?></body></html>""",
++            "ä?>",
++            id="bytes-iso-8859-1",
++        ),
++        pytest.param(
++            b"""<?xml version="1.0"?><!DOCTYPE html><html><body>\xC3\xA4?></body></html>""",
++            "ä?>",
++            id="bytes-unknown",
++        ),
++    ])
++    def test_parse_html_xhtml5(self, content, expected):
++        tree = parse_html(content)
++        assert tree.xpath(".//body/text()") == [expected]
+ 
+     def test_parse_qsd(self):
+         assert parse_qsd("test=1&foo=bar", schema=validate.Schema({"test": str, "foo": "bar"})) == {"test": "1", "foo": "bar"}
+