summaryrefslogtreecommitdiff
path: root/dev-python/cchardet
diff options
context:
space:
mode:
Diffstat (limited to 'dev-python/cchardet')
-rw-r--r--dev-python/cchardet/Manifest2
-rw-r--r--dev-python/cchardet/cchardet-2.1.7-r1.ebuild30
-rw-r--r--dev-python/cchardet/files/cchardet-2.1.7-pytest.patch120
3 files changed, 152 insertions, 0 deletions
diff --git a/dev-python/cchardet/Manifest b/dev-python/cchardet/Manifest
index 96c4640c7f6b..d5730bb6c0c5 100644
--- a/dev-python/cchardet/Manifest
+++ b/dev-python/cchardet/Manifest
@@ -1,3 +1,5 @@
+AUX cchardet-2.1.7-pytest.patch 3529 BLAKE2B 2ce0bb1a7ee281477f471dad08b1e7819b9d47b96db2c4321b67185dd860de5f4e94150846e76ab8364bee079f823884af366256c8ad98ef0797987d2cab9a0c SHA512 0e7d126a49c3c1d2c810085aa1dd6fa1b1b38807876d168d3d459ea837bb8e146e0585c93a10599713a5511a76c328696ee0889d4c9a63ef8aee4d97b5df686a
DIST cchardet-2.1.7.tar.gz 653617 BLAKE2B 0ca9becac01c67da191290c7de0dc52d5c8e6c2715f660811c8e67d9a06e74ac155a081de81af96ade74ccc4065093fc226f232a26f66236fafe9fc1b48a9c9e SHA512 43e663e30ec079b2a954862de5e8136a2e40f69e300d65eb4ce9d7ffa5d8c496dc7c0937b3306b4096cfad12a1d0617628f8f0115534ab6faf9eb39d2b3935a2
+EBUILD cchardet-2.1.7-r1.ebuild 632 BLAKE2B 910cdcde8902d74a17609ca8f768db9a0b33093313ae9589e9bae45b4c52d84856075f0c24825e0c45910e08bc01f7f2d369edefed2fac5820c31558ba6edce3 SHA512 451dbdb2b50d21d5172090b8043b4240296a03bbb3ab23badde91945486d6ca270acffc6dcf7eae5eb43d9bac60468605b357b12170bf63576f090031f521e5d
EBUILD cchardet-2.1.7.ebuild 646 BLAKE2B 8648dd6bcb94fcfa5245df1c5cbe41e13844b44adf882c3bf27f5a65cd5cf2c88145a3ed032b276bccf8162bb15c66b527c6c6bcc7447303eefa63f178f5b207 SHA512 38da8f1dee07329aabc28efae0911ed6769c02e89e8a0facdb9dc6eed399a3b2988d3bd181f5d72660e13fb6164df927eed7e9857e861bbe7b60457d3188703e
MISC metadata.xml 346 BLAKE2B 86fd035032c4a4223ca8b8b7db0b3593b60676376b17ebc0c2a02ccce6d4dd22f7bcc80350634378bfd9afb44640a4f8798dad5f479024510c90b0a2f9425eb9 SHA512 f33d9efa195ef267c11565d657fdbfdc6b11af571b812e7ce9885b41ab7e48c4e58422618ebf6809d7e1adab00977996d86ac7fe4f47b0db4267a34d16607805
diff --git a/dev-python/cchardet/cchardet-2.1.7-r1.ebuild b/dev-python/cchardet/cchardet-2.1.7-r1.ebuild
new file mode 100644
index 000000000000..4f344e9f6e57
--- /dev/null
+++ b/dev-python/cchardet/cchardet-2.1.7-r1.ebuild
@@ -0,0 +1,30 @@
+# Copyright 2021-2022 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+DISTUTILS_USE_PEP517=setuptools
+PYTHON_COMPAT=( python3_{8..11} )
+inherit distutils-r1
+
+DESCRIPTION="High speed universal character encoding detector"
+HOMEPAGE="
+ https://github.com/PyYoshi/cChardet
+ https://pypi.org/project/cchardet/
+"
+SRC_URI="mirror://pypi/${PN:0:1}/${PN}/${P}.tar.gz"
+
+LICENSE="MPL-1.1"
+SLOT="0"
+KEYWORDS="~amd64 ~arm ~x86"
+
+BDEPEND="
+ dev-python/cython[${PYTHON_USEDEP}]
+"
+
+PATCHES=(
+ # https://github.com/PyYoshi/cChardet/pull/78
+ "${FILESDIR}/${P}-pytest.patch"
+)
+
+distutils_enable_tests pytest
diff --git a/dev-python/cchardet/files/cchardet-2.1.7-pytest.patch b/dev-python/cchardet/files/cchardet-2.1.7-pytest.patch
new file mode 100644
index 000000000000..11f38579c184
--- /dev/null
+++ b/dev-python/cchardet/files/cchardet-2.1.7-pytest.patch
@@ -0,0 +1,120 @@
+https://github.com/PyYoshi/cChardet/pull/78
+
+From: q0w <43147888+q0w@users.noreply.github.com>
+Date: Wed, 17 Nov 2021 14:50:41 +0300
+Subject: [PATCH 02/13] Use pytest
+
+--- /dev/null
++++ b/src/tests/cchardet_test.py
+@@ -0,0 +1,111 @@
++import glob
++import os
++
++import cchardet
++
++SKIP_LIST = [
++ 'src/tests/testdata/ja/utf-16le.txt',
++ 'src/tests/testdata/ja/utf-16be.txt',
++ 'src/tests/testdata/es/iso-8859-15.txt',
++ 'src/tests/testdata/da/iso-8859-1.txt',
++ 'src/tests/testdata/he/iso-8859-8.txt'
++]
++
++# Python can't decode encoding
++SKIP_LIST_02 = [
++ 'src/tests/testdata/vi/viscii.txt',
++ 'src/tests/testdata/zh/euc-tw.txt'
++]
++SKIP_LIST_02.extend(SKIP_LIST)
++
++
++def test_ascii():
++ detected_encoding = cchardet.detect(b'abcdefghijklmnopqrstuvwxyz')
++ assert 'ascii' == detected_encoding['encoding'].lower()
++
++
++def test_detect():
++ testfiles = glob.glob('src/tests/testdata/*/*.txt')
++ for testfile in testfiles:
++ if testfile.replace("\\", "/") in SKIP_LIST:
++ continue
++
++ base = os.path.basename(testfile)
++ expected_charset = os.path.splitext(base)[0]
++ with open(testfile, 'rb') as f:
++ msg = f.read()
++ detected_encoding = cchardet.detect(msg)
++ assert expected_charset.lower() == detected_encoding['encoding'].lower()
++
++
++def test_detector():
++ detector = cchardet.UniversalDetector()
++ with open("src/tests/samples/wikipediaJa_One_Thousand_and_One_Nights_SJIS.txt", 'rb') as f:
++ line = f.readline()
++ while line:
++ detector.feed(line)
++ if detector.done:
++ break
++ line = f.readline()
++ detector.close()
++ detected_encoding = detector.result
++ assert "shift_jis" == detected_encoding['encoding'].lower()
++
++
++def test_github_issue_20():
++ """
++ https://github.com/PyYoshi/cChardet/issues/20
++ """
++ msg = b'\x8f'
++
++ cchardet.detect(msg)
++
++ detector = cchardet.UniversalDetector()
++ detector.feed(msg)
++ detector.close()
++
++
++def test_decode():
++ testfiles = glob.glob('src/tests/testdata/*/*.txt')
++ for testfile in testfiles:
++ if testfile.replace("\\", "/") in SKIP_LIST_02:
++ continue
++
++ base = os.path.basename(testfile)
++ expected_charset = os.path.splitext(base)[0]
++ with open(testfile, 'rb') as f:
++ msg = f.read()
++ detected_encoding = cchardet.detect(msg)
++ try:
++ msg.decode(detected_encoding["encoding"])
++ except LookupError as e:
++ print("LookupError: { file=%s, encoding=%s }" % (
++ testfile, detected_encoding["encoding"]))
++ raise e
++
++
++def test_utf8_with_bom():
++ sample = b'\xEF\xBB\xBF'
++ detected_encoding = cchardet.detect(sample)
++ assert "utf-8-sig" == detected_encoding['encoding'].lower()
++
++
++def test_null_bytes():
++ sample = b'ABC\x00\x80\x81'
++ detected_encoding = cchardet.detect(sample)
++
++ assert detected_encoding['encoding'] is None
++
++# def test_iso8859_2_csv(self):
++# testfile = 'tests/samples/iso8859-2.csv'
++# with open(testfile, 'rb') as f:
++# msg = f.read()
++# detected_encoding = cchardet.detect(msg)
++# eq_(
++# "iso8859-2",
++# detected_encoding['encoding'].lower(),
++# 'Expected %s, but got %s' % (
++# "iso8859-2",
++# detected_encoding['encoding'].lower()
++# )
++# )