summaryrefslogtreecommitdiff
path: root/dev-python/html5lib
diff options
context:
space:
mode:
authorV3n3RiX <venerix@redcorelinux.org>2017-10-09 18:53:29 +0100
committerV3n3RiX <venerix@redcorelinux.org>2017-10-09 18:53:29 +0100
commit4f2d7949f03e1c198bc888f2d05f421d35c57e21 (patch)
treeba5f07bf3f9d22d82e54a462313f5d244036c768 /dev-python/html5lib
reinit the tree, so we can have metadata
Diffstat (limited to 'dev-python/html5lib')
-rw-r--r--dev-python/html5lib/Manifest10
-rw-r--r--dev-python/html5lib/files/html5lib-0.9999999-lxml-3.5.0-backport.patch117
-rw-r--r--dev-python/html5lib/files/html5lib-0.9999999-python3.6-sanitizer-re.patch50
-rw-r--r--dev-python/html5lib/html5lib-0.9999999-r2.ebuild32
-rw-r--r--dev-python/html5lib/html5lib-0.9999999.ebuild33
-rw-r--r--dev-python/html5lib/html5lib-0.999999999.ebuild32
-rw-r--r--dev-python/html5lib/metadata.xml17
7 files changed, 291 insertions, 0 deletions
diff --git a/dev-python/html5lib/Manifest b/dev-python/html5lib/Manifest
new file mode 100644
index 000000000000..e67e71e14472
--- /dev/null
+++ b/dev-python/html5lib/Manifest
@@ -0,0 +1,10 @@
+AUX html5lib-0.9999999-lxml-3.5.0-backport.patch 4654 SHA256 c93abd685ee6454100482b1f0a095b05581c5e86f0542aa3bef1d55ed789e3c4 SHA512 bcaddd8dfd71c943b472028b7c21cf913d58e6093d5f15fc280597d3d01e8a7d96a27f976e5b94b6289d0ea6ed4756abaf1bc382a4c48b95c65aedd2992aed05 WHIRLPOOL 2be2e23b075ede2239aa772142af97f85959838c3a75e8aabf391e50624b506ef4ef72076ec9582287b49130feef3e06767ec2542d6f8cccecef61dacc9c5288
+AUX html5lib-0.9999999-python3.6-sanitizer-re.patch 2591 SHA256 3dad183ba2e5501b0910e0132bf32f1bc047e7c3ad809c6cd9645fa7e60bb464 SHA512 c95e2a4a4f1fd1e376a6af7710c6ed94747e54de8d1f1c6233f56405405011c2f9fcebb93e5dfd19589148387a62abe856e7d55b0d0014ca931b337e825c35ce WHIRLPOOL 339422a77c001775fe9c2ce0964104b81d36fe0f6a5e80109f2f0d7ee7bb038002347ac88a2600d8b98cc2e27e3f6b6342bf6d4288a43a8b41812070147b4ffe
+DIST html5lib-0.9999999.tar.gz 889312 SHA256 2612a191a8d5842bfa057e41ba50bbb9dcb722419d2408c78cff4758d0754868 SHA512 1748a1921f4bc93b7f208d89701eeabfe507e40515dd2e88ece239ba7c1704c8ae9dc4eea310164c7b207225dce910f78a8b34f826f4f87a2992365c07089c28 WHIRLPOOL 48dbd278e247e98e2765e7da1ce99474fba241b0a6b70e441899271b0cffffca33720646a35ae724713be5ce5a15617d611ebd76c192755e72485d09d446f5ac
+DIST html5lib-0.999999999.tar.gz 245488 SHA256 ee747c0ffd3028d2722061936b5c65ee4fe13c8e4613519b4447123fc4546298 SHA512 b30d9ea74d0ad03e07ebee0d33caefeed717e6084f2ef44559ebf73948563ffd60e40c774e23c8964a32b0f265d1d1f0b98759861acaa208f28e5ab2813c3ca0 WHIRLPOOL 369e81d83d03636b34901277f53e80373f54ecc15ae948d4a4f04554cd78815b6574759835d76e3105028803e6797a92144a1346e7c826a72269239dc5d105ee
+EBUILD html5lib-0.9999999-r2.ebuild 927 SHA256 ebd6a38a450ac0c223ad02ebd63eb0c8d872bca9ef1b6d4dcead0b186cc320b3 SHA512 adb8a68a96e5d785dfb9490209f0c54e549a322eabc62f8887021754dee30de5df6bcfbb060220d6cf483ac9ad2a1fc9c77db4beaf794f1c1404d0d039e1c716 WHIRLPOOL d8aa600eed1780523774dc3c7b8479b98600efde953908a4a50bc257fe600c37c1fb7ffc0af3c4c584c745c0a97c63670967c466457a686db5d0d858d7ad6ad8
+EBUILD html5lib-0.9999999.ebuild 1095 SHA256 5881d605639d95cac57786ee63cebb1b93f21410317092db39f924286d080dc2 SHA512 a556b56261fda05dce6005e0e8fc9fb58fcc3de79ceef34db2ef4209ca1b8294b7dd4aeb8dc055831de9635e452467d0ed27ebccdfc938f83835d4230ef5e0f0 WHIRLPOOL 650c8ac17e8c3125707936205d5e16caf82047919cb20eb3e9d0b38e31fa3d21728f19f7077856bb51b19edb4f8cdec8003e7857e8f14944a2f46fd0e2303805
+EBUILD html5lib-0.999999999.ebuild 865 SHA256 c8bb362ba1f5220b9fbc3352eed06d88a0ae98ab3992ff5c6872dc2ff93e86f5 SHA512 2b096eb600834fdd5a301548e2e86a8fba9ce993d86b6a9d241d6b48ba6c98ae3b73483705bfff8e17be614b569ab35990c7c89d1ed517f81e3951c7c775f6b6 WHIRLPOOL 9bc5c36dbac107538b0a355dddba6fe588620e0a9e04f32da2629c4ee9850170b44db8590d212563927b17e71ff319ba37c02ef33e5e64b46e1e9a37f09b1011
+MISC ChangeLog 6481 SHA256 83ff0832d1526d6ff37cacf96cab2582fdb8586cd2f9b85c0c60c0e853d009b9 SHA512 1129ccbae3e81a94a1c18e0fa04d8428be1558e12d800587973c337665bf07d63eecbfb11c88db33615884f9486523716947e30d6d05d3b0359f8125792bb72d WHIRLPOOL 311815008f7961868e187fa2596734957238b32b800880c76eb08c0ec36097ebbc01a0ddb1d4f33de49008a79ceb3e69543b81bceab3e4b280890cad1da9d43f
+MISC ChangeLog-2015 6235 SHA256 63019b29710baea895a82cc7ca2c5d5a645b897d9c373db0e2d55ad8b5bbc92c SHA512 adc85d838edfe98374118868d31c405b36a2b3ab76ff07b4afb078ff952498e37140c2848bae72dee1b0ae6069c22c0c94a3da3429178b50788f0833959edcae WHIRLPOOL cacc2d9e9bde8ab8b4da407aa9b4b48b309fce8aa336277b3e94357c55be8f602b15b5aebb97af471880d401ad11390c19911b61d02cf17476a30cb4f760eedb
+MISC metadata.xml 589 SHA256 78e9ccc2c370cbac692bbf3f71fb54029f84fe6c81539fc896f480e6bad36053 SHA512 b619c0adb0c781b00e656b1027465ec19d1ab7d0792475f197ab4164cb8a1c26563dcf48d147e346637789c8ec1603afaea428357c1022c42e22b85967e1694a WHIRLPOOL b3f5cbf79934926f7b8d0ee89480ba963f91a11bcc550c9be9547099f42588a188a8760073abf6916c54bc68df6147fed2d72af758a1d91e48a54cd996ab0e19
diff --git a/dev-python/html5lib/files/html5lib-0.9999999-lxml-3.5.0-backport.patch b/dev-python/html5lib/files/html5lib-0.9999999-lxml-3.5.0-backport.patch
new file mode 100644
index 000000000000..fecfab9a4fb4
--- /dev/null
+++ b/dev-python/html5lib/files/html5lib-0.9999999-lxml-3.5.0-backport.patch
@@ -0,0 +1,117 @@
+From 46046c0f7125911ff8205f09a7574573bb953105 Mon Sep 17 00:00:00 2001
+From: Geoffrey Sneddon <geoffers@gmail.com>
+Date: Mon, 23 Nov 2015 15:17:07 +0000
+Subject: [PATCH 1/3] Make lxml tree-builder coerce comments to work with lxml
+ 3.5.
+
+---
+ html5lib/ihatexml.py | 2 ++
+ html5lib/treebuilders/etree_lxml.py | 2 +-
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py
+index 0fc7930..b5b2e98 100644
+--- a/html5lib/ihatexml.py
++++ b/html5lib/ihatexml.py
+@@ -225,6 +225,8 @@ def coerceComment(self, data):
+ while "--" in data:
+ warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
+ data = data.replace("--", "- -")
++ if data.endswith("-"):
++ data += " "
+ return data
+
+ def coerceCharacters(self, data):
+diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
+index 35d08ef..17007e3 100644
+--- a/html5lib/treebuilders/etree_lxml.py
++++ b/html5lib/treebuilders/etree_lxml.py
+@@ -189,7 +189,7 @@ class TreeBuilder(_base.TreeBuilder):
+
+ def __init__(self, namespaceHTMLElements, fullTree=False):
+ builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
+- infosetFilter = self.infosetFilter = ihatexml.InfosetFilter()
++ infosetFilter = self.infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
+ self.namespaceHTMLElements = namespaceHTMLElements
+
+ class Attributes(dict):
+
+From 1c22e1ce93dd4acc81a66cfa03cf9720fbd741c7 Mon Sep 17 00:00:00 2001
+From: Geoffrey Sneddon <geoffers@gmail.com>
+Date: Mon, 23 Nov 2015 15:35:21 +0000
+Subject: [PATCH 2/3] fixup! Make lxml tree-builder coerce comments to work
+ with lxml 3.5.
+
+---
+ html5lib/ihatexml.py | 1 +
+ html5lib/treebuilders/etree_lxml.py | 7 ++++---
+ 2 files changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py
+index b5b2e98..5a81a12 100644
+--- a/html5lib/ihatexml.py
++++ b/html5lib/ihatexml.py
+@@ -226,6 +226,7 @@ def coerceComment(self, data):
+ warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
+ data = data.replace("--", "- -")
+ if data.endswith("-"):
++ warnings.warn("Comments cannot contain end in a dash", DataLossWarning)
+ data += " "
+ return data
+
+diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
+index 17007e3..c6c981f 100644
+--- a/html5lib/treebuilders/etree_lxml.py
++++ b/html5lib/treebuilders/etree_lxml.py
+@@ -54,7 +54,7 @@ def _getChildNodes(self):
+ def testSerializer(element):
+ rv = []
+ finalText = None
+- infosetFilter = ihatexml.InfosetFilter()
++ infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
+
+ def serializeElement(element, indent=0):
+ if not hasattr(element, "tag"):
+@@ -257,7 +257,7 @@ def _getData(self):
+ data = property(_getData, _setData)
+
+ self.elementClass = Element
+- self.commentClass = builder.Comment
++ self.commentClass = Comment
+ # self.fragmentClass = builder.DocumentFragment
+ _base.TreeBuilder.__init__(self, namespaceHTMLElements)
+
+@@ -344,7 +344,8 @@ def insertRoot(self, token):
+
+ # Append the initial comments:
+ for comment_token in self.initial_comments:
+- root.addprevious(etree.Comment(comment_token["data"]))
++ comment = self.commentClass(comment_token["data"])
++ root.addprevious(comment._element)
+
+ # Create the root document and add the ElementTree to it
+ self.document = self.documentClass()
+
+From 235a6d7ac7e0a3e2b431766e051094c2d3110ba3 Mon Sep 17 00:00:00 2001
+From: Geoffrey Sneddon <geoffers@gmail.com>
+Date: Mon, 23 Nov 2015 15:42:12 +0000
+Subject: [PATCH 3/3] fixup! Make lxml tree-builder coerce comments to work
+ with lxml 3.5.
+
+---
+ html5lib/ihatexml.py | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py
+index 5a81a12..5da5d93 100644
+--- a/html5lib/ihatexml.py
++++ b/html5lib/ihatexml.py
+@@ -226,7 +226,7 @@ def coerceComment(self, data):
+ warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
+ data = data.replace("--", "- -")
+ if data.endswith("-"):
+- warnings.warn("Comments cannot contain end in a dash", DataLossWarning)
++ warnings.warn("Comments cannot end in a dash", DataLossWarning)
+ data += " "
+ return data
+
diff --git a/dev-python/html5lib/files/html5lib-0.9999999-python3.6-sanitizer-re.patch b/dev-python/html5lib/files/html5lib-0.9999999-python3.6-sanitizer-re.patch
new file mode 100644
index 000000000000..2fbef2ad0b9e
--- /dev/null
+++ b/dev-python/html5lib/files/html5lib-0.9999999-python3.6-sanitizer-re.patch
@@ -0,0 +1,50 @@
+--- a/html5lib/sanitizer.py
++++ b/html5lib/sanitizer.py
+@@ -203,7 +203,7 @@
+ for attr in self.attr_val_is_uri:
+ if attr not in attrs:
+ continue
+- val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
++ val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '',
+ unescape(attrs[attr])).lower()
+ # remove replacement characters from unescaped characters
+ val_unescaped = val_unescaped.replace("\ufffd", "")
+@@ -228,7 +228,7 @@
+ ' ',
+ unescape(attrs[attr]))
+ if (token["name"] in self.svg_allow_local_href and
+- 'xlink:href' in attrs and re.search('^\s*[^#\s].*',
++ 'xlink:href' in attrs and re.search(r'^\s*[^#\s].*',
+ attrs['xlink:href'])):
+ del attrs['xlink:href']
+ if 'style' in attrs:
+@@ -257,16 +257,16 @@
+
+ def sanitize_css(self, style):
+ # disallow urls
+- style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
++ style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
+
+ # gauntlet
+- if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
++ if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
+ return ''
+- if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
++ if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
+ return ''
+
+ clean = []
+- for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style):
++ for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
+ if not value:
+ continue
+ if prop.lower() in self.allowed_css_properties:
+@@ -275,7 +275,7 @@
+ 'padding']:
+ for keyword in value.split():
+ if keyword not in self.acceptable_css_keywords and \
+- not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
++ not re.match(r"^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
+ break
+ else:
+ clean.append(prop + ': ' + value + ';')
diff --git a/dev-python/html5lib/html5lib-0.9999999-r2.ebuild b/dev-python/html5lib/html5lib-0.9999999-r2.ebuild
new file mode 100644
index 000000000000..5b23d984fc3e
--- /dev/null
+++ b/dev-python/html5lib/html5lib-0.9999999-r2.ebuild
@@ -0,0 +1,32 @@
+# Copyright 1999-2017 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=6
+
+PYTHON_COMPAT=( python2_7 python3_{4,5,6} pypy pypy3 )
+PYTHON_REQ_USE="xml(+)"
+
+inherit distutils-r1
+
+DESCRIPTION="HTML parser based on the HTML5 specification"
+HOMEPAGE="https://github.com/html5lib/html5lib-python/ https://html5lib.readthedocs.org"
+SRC_URI="mirror://pypi/${PN:0:1}/${PN}/${P}.tar.gz"
+
+LICENSE="MIT"
+SLOT="0"
+KEYWORDS="~alpha ~amd64 ~arm ~arm64 ~hppa ~ia64 ~mips ~ppc ~ppc64 ~s390 ~sh ~sparc ~x86 ~amd64-fbsd ~x86-fbsd ~amd64-linux ~x86-linux"
+IUSE="test"
+
+RDEPEND="dev-python/six[${PYTHON_USEDEP}]"
+DEPEND="${RDEPEND}
+ dev-python/setuptools[${PYTHON_USEDEP}]
+ test? ( dev-python/nose[${PYTHON_USEDEP}] )"
+
+PATCHES=(
+ "${FILESDIR}"/${P}-lxml-3.5.0-backport.patch
+ "${FILESDIR}"/${P}-python3.6-sanitizer-re.patch
+)
+
+python_test() {
+ nosetests --verbosity=3 || die "Tests fail with ${EPYTHON}"
+}
diff --git a/dev-python/html5lib/html5lib-0.9999999.ebuild b/dev-python/html5lib/html5lib-0.9999999.ebuild
new file mode 100644
index 000000000000..75af49a9d1ce
--- /dev/null
+++ b/dev-python/html5lib/html5lib-0.9999999.ebuild
@@ -0,0 +1,33 @@
+# Copyright 1999-2017 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=5
+
+PYTHON_COMPAT=( python2_7 python3_{4,5,6} pypy pypy3 )
+PYTHON_REQ_USE="xml(+)"
+
+inherit distutils-r1
+
+DESCRIPTION="HTML parser based on the HTML5 specification"
+HOMEPAGE="https://github.com/html5lib/html5lib-python/ https://html5lib.readthedocs.org"
+SRC_URI="mirror://pypi/${PN:0:1}/${PN}/${P}.tar.gz"
+
+LICENSE="MIT"
+SLOT="0"
+KEYWORDS="alpha amd64 arm arm64 hppa ia64 ~mips ppc ppc64 ~s390 ~sh sparc x86 ~amd64-fbsd ~x86-fbsd ~amd64-linux ~x86-linux"
+IUSE="test"
+
+RDEPEND="dev-python/six[${PYTHON_USEDEP}]"
+DEPEND="${RDEPEND}
+ dev-python/setuptools[${PYTHON_USEDEP}]
+ test? ( dev-python/nose[${PYTHON_USEDEP}] )"
+
+python_test() {
+ # https://github.com/html5lib/html5lib-python/issues/224
+ # https://bugs.gentoo.org/show_bug.cgi?id=571644
+ has_version =dev-python/lxml-3.5.0 && \
+ einfo "test are broken with dev-python/lxml-3.5.0" && \
+ einfo "https://github.com/html5lib/html5lib-python/issues/224" && \
+ return
+ nosetests --verbosity=3 || die "Tests fail with ${EPYTHON}"
+}
diff --git a/dev-python/html5lib/html5lib-0.999999999.ebuild b/dev-python/html5lib/html5lib-0.999999999.ebuild
new file mode 100644
index 000000000000..7e238cc4804a
--- /dev/null
+++ b/dev-python/html5lib/html5lib-0.999999999.ebuild
@@ -0,0 +1,32 @@
+# Copyright 1999-2017 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=6
+
+PYTHON_COMPAT=( python2_7 python3_{4,5,6} pypy pypy3 )
+PYTHON_REQ_USE="xml(+)"
+
+inherit distutils-r1
+
+DESCRIPTION="HTML parser based on the HTML5 specification"
+HOMEPAGE="https://github.com/html5lib/html5lib-python/ https://html5lib.readthedocs.org"
+SRC_URI="mirror://pypi/${PN:0:1}/${PN}/${P}.tar.gz"
+
+LICENSE="MIT"
+SLOT="0"
+KEYWORDS="~amd64 ~arm ~arm64 ~hppa ~ia64 ~mips ~ppc ~ppc64 ~x86"
+IUSE="test"
+
+RDEPEND="dev-python/six[${PYTHON_USEDEP}]
+ dev-python/webencodings[${PYTHON_USEDEP}]"
+DEPEND="${RDEPEND}
+ dev-python/setuptools[${PYTHON_USEDEP}]
+ test? (
+ dev-python/pytest[${PYTHON_USEDEP}]
+ dev-python/pytest-expect[${PYTHON_USEDEP}]
+ dev-python/mock[${PYTHON_USEDEP}]
+ )"
+
+python_test() {
+ py.test -v || die "Tests fail with ${EPYTHON}"
+}
diff --git a/dev-python/html5lib/metadata.xml b/dev-python/html5lib/metadata.xml
new file mode 100644
index 000000000000..a9695174a877
--- /dev/null
+++ b/dev-python/html5lib/metadata.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE pkgmetadata SYSTEM "http://www.gentoo.org/dtd/metadata.dtd">
+<pkgmetadata>
+ <maintainer type="project">
+ <email>python@gentoo.org</email>
+ <name>Python</name>
+ </maintainer>
+ <longdescription>
+ html5lib is a pure-python library for parsing HTML. It is designed
+ to conform to the WHATWG HTML specification, as is implemented by
+ all major web browsers.
+</longdescription>
+ <upstream>
+ <remote-id type="pypi">html5lib</remote-id>
+ <remote-id type="github">html5lib/html5lib-python</remote-id>
+ </upstream>
+</pkgmetadata>