summaryrefslogtreecommitdiff
path: root/sci-ml/datasets
diff options
context:
space:
mode:
Diffstat (limited to 'sci-ml/datasets')
-rw-r--r--sci-ml/datasets/Manifest8
-rw-r--r--sci-ml/datasets/datasets-3.5.0.ebuild (renamed from sci-ml/datasets/datasets-3.0.2.ebuild)75
-rw-r--r--sci-ml/datasets/files/datasets-2.21.0-tests.patch22
-rw-r--r--sci-ml/datasets/files/datasets-3.0.2-tests.patch10
-rw-r--r--sci-ml/datasets/metadata.xml4
5 files changed, 63 insertions, 56 deletions
diff --git a/sci-ml/datasets/Manifest b/sci-ml/datasets/Manifest
index 86d43b89b744..1139c6f41dad 100644
--- a/sci-ml/datasets/Manifest
+++ b/sci-ml/datasets/Manifest
@@ -1,5 +1,3 @@
-AUX datasets-2.21.0-tests.patch 721 BLAKE2B cf85b02a4eb5da27e52bf819f00e333d6e3fb3fb0b9f7279afff24e3fddf76fcd46cb5e06d29b4780d0ce6b48c9378df301c70f6339e2f2898560219ebab4210 SHA512 43cdc5af6ba3fff3e7641006be6d8e10a5ad00136c1cf137628456019cccbd3d05b888b7eeec1cb8cd42d5ce52091b2a66d32a5b41ff2193658c4257900ea45c
-AUX datasets-3.0.2-tests.patch 335 BLAKE2B b7499d85cc8cf09a2cb9b58aa0c7ab16c1bc700812c767465979cc59b11d63d05f52422ae731317f373314164715079f6fa81b30d47e3058ce88d6539d049789 SHA512 3f533ab765362ea6a1bc7a1766d332f5bc53e342cbb684061eaed58ef415f715c003f210b73a8b0ca20245d8e1f60517191829fb64feb53101211a9f81542e5a
-DIST datasets-3.0.2.gh.tar.gz 1867037 BLAKE2B 92475ac566bd825c542dbd3e138c83e1733e6d1137f30a2cfd8f82f7dd54ed78c0566aa766182625e8787413be18f5a0c0c0a289b5823fa9112dcb2876074b7e SHA512 aa55dd2f53df7217c30999f759c76a13d78851233bce4df9b1ad85081e101a7280e905822160874d24d15ea1e1a5ecee1bae9cc66785e89af45ad16dd0339595
-EBUILD datasets-3.0.2.ebuild 2612 BLAKE2B cc533351d7685f3e40cd458c324c3a29c51e3cfe7ed0258962793a4d6c4a14c34a12dc142292a0c09f801afeb5125617a2d52d2f41ac36c824fcba72ab09c5d3 SHA512 5d9da157f3e4747c6e21a32678998b64acb364dc5db32dab43a262633f2b6030d762bfbeca044647ef74946dce9588149f6075f28d3072e270621cf49b432221
-MISC metadata.xml 379 BLAKE2B 48ebb9e7bfa8b58b0d15b82c4146def465e08cf3212ab4af04129d09c153b67b00d0fa05b94d6af54f643ec3a202f2335d3254b966f49d1394d3c7b9e5da56a5 SHA512 99560decfaa0e438980f372d99257695e9ca9585167d9aba091e0b775c2f8384657ddc017841c8f06f8b568017a54fb9e31da736f3c875da717e154cdce876d1
+DIST datasets-3.5.0.gh.tar.gz 1906212 BLAKE2B 2c26b907230eb9256a6056878dc80eae3296869f24597298f262d49a5fc0a31dbcd6ebb8dfaca9cf27faece019a4f39ab0dd07db772659d414e1d3f5ecfe2258 SHA512 ca053187b6b80140136991e5b220136cf22f3cd98d073323e0a72d77211248d52986aa49b0e595759ba023b4551a9dc01ab1cac853c855f032acbd9a023ecd5a
+EBUILD datasets-3.5.0.ebuild 4237 BLAKE2B eac02108ad27e8bf772b9ba9531908a980df78288d73a35cfebc91ff2c8bace552fc5f3146257fa70766ea9078e1140a737361eea691a78c58521d3a5e79eb2e SHA512 e29c8387ce8bb2823bfd20a426ac5773921a9a25f822f14fee9ab159ecb70540af1d12b8b27755d6c077712a158ebc74e08b09588da196afd0a8b668c92cc630
+MISC metadata.xml 478 BLAKE2B 8f4309c23cc1b048838c0120e6be9e83d7e796eced3488a32d791896d446f8c27b2e53cbdb04f23e5bf778f9d38e112267f1bae87669e39f3349fd4639fe0e8b SHA512 1583735215d169a07f78e74ad60061ccb49c308e02745e4400dd92ded96d2818b315c8578735ef2166f11c6bd5a240d62021bfa3a82bbd9670a7c3ad9743c78b
diff --git a/sci-ml/datasets/datasets-3.0.2.ebuild b/sci-ml/datasets/datasets-3.5.0.ebuild
index e6b81257061a..98f3234c789b 100644
--- a/sci-ml/datasets/datasets-3.0.2.ebuild
+++ b/sci-ml/datasets/datasets-3.5.0.ebuild
@@ -4,7 +4,7 @@
EAPI=8
DISTUTILS_USE_PEP517=setuptools
-PYTHON_COMPAT=( python3_{10..13} )
+PYTHON_COMPAT=( python3_{11..13} )
DISTUTILS_SINGLE_IMPL=1
inherit distutils-r1
@@ -16,12 +16,11 @@ SRC_URI="https://github.com/huggingface/${PN}/archive/refs/tags/${PV}.tar.gz
LICENSE="Apache-2.0"
SLOT="0"
KEYWORDS="~amd64"
+IUSE="torch vision"
+REQUIRES_USE="test? ( torch vision )"
RDEPEND="
- ${PYTHON_DEPS}
- sci-ml/caffe2[${PYTHON_SINGLE_USEDEP},numpy]
sci-ml/huggingface_hub[${PYTHON_SINGLE_USEDEP}]
- sci-ml/pytorch[${PYTHON_SINGLE_USEDEP}]
$(python_gen_cond_dep '
dev-python/aiohttp[${PYTHON_USEDEP}]
dev-python/dill[${PYTHON_USEDEP}]
@@ -36,46 +35,68 @@ RDEPEND="
dev-python/requests[${PYTHON_USEDEP}]
dev-python/tqdm[${PYTHON_USEDEP}]
dev-python/xxhash[${PYTHON_USEDEP}]
+ vision? (
+ dev-python/pillow[${PYTHON_USEDEP}]
+ )
')
+ torch? (
+ sci-ml/caffe2[${PYTHON_SINGLE_USEDEP},numpy]
+ sci-ml/pytorch[${PYTHON_SINGLE_USEDEP}]
+ )
"
DEPEND="${RDEPEND}"
+# Missing x test:
+# joblib
+# joblibspark
+# faiss-cpu
+# jax
+# jaxlib
+# polars
+# pyav
+# pyspark
+# py7zr
+# s3fs
+# tensorflow
+# tiktoken
+# torchdata
+# transformers
BDEPEND="test? (
sci-ml/torchvision[${PYTHON_SINGLE_USEDEP}]
$(python_gen_cond_dep '
dev-python/absl-py[${PYTHON_USEDEP}]
dev-python/decorator[${PYTHON_USEDEP}]
+ dev-python/elasticsearch[${PYTHON_USEDEP}]
+ dev-python/lz4[${PYTHON_USEDEP}]
+ dev-python/moto[${PYTHON_USEDEP}]
+ dev-python/protobuf:=[${PYTHON_USEDEP}]
dev-python/pytest-datadir[${PYTHON_USEDEP}]
- dev-python/scikit-learn[${PYTHON_USEDEP}]
+ dev-python/pytest-xdist[${PYTHON_USEDEP}]
+ dev-python/soundfile[${PYTHON_USEDEP}]
dev-python/sqlalchemy[${PYTHON_USEDEP}]
dev-python/zstandard[${PYTHON_USEDEP}]
- sci-ml/jiwer[${PYTHON_USEDEP}]
- sci-ml/seqeval[${PYTHON_USEDEP}]
')
)"
-PATCHES=(
- "${FILESDIR}"/${P}-tests.patch
-)
-
distutils_enable_tests pytest
-src_prepare() {
- distutils-r1_src_prepare
- sed -i -e \
- "/pyarrow_hotfix/d" \
- src/datasets/features/features.py || die
-}
-
src_test() {
local EPYTEST_IGNORE=(
tests/features/test_audio.py
- tests/test_fingerprint.py
tests/packaged_modules/test_audiofolder.py
tests/packaged_modules/test_spark.py
+ tests/test_fingerprint.py
tests/test_iterable_dataset.py
+ tests/test_inspect.py
+ tests/test_load.py
+ tests/test_upstream_hub.py
)
local EPYTEST_DESELECT=(
+ tests/commands/test_test.py::test_test_command
+ tests/features/test_video.py::test_video_feature_encode_example
+ tests/features/test_video.py::test_dataset_with_video_feature
+ tests/features/test_video.py::test_dataset_with_video_map_and_formatted
+ tests/io/test_parquet.py::test_parquet_read_geoparquet
tests/packaged_modules/test_cache.py::test_cache_multi_configs
tests/packaged_modules/test_cache.py::test_cache_single_config
tests/test_arrow_dataset.py::BaseDatasetTest::test_filter_caching_on_disk
@@ -83,7 +104,23 @@ src_test() {
tests/test_distributed.py::test_torch_distributed_run
tests/test_file_utils.py::TestxPath::test_xpath_rglob
tests/test_file_utils.py::TestxPath::test_xpath_glob
+ tests/test_file_utils.py::test_xexists_private
+ tests/test_file_utils.py::test_xlistdir_private
+ tests/test_file_utils.py::test_xisdir_private
+ tests/test_file_utils.py::test_xisfile_private
+ tests/test_file_utils.py::test_xgetsize_private
+ tests/test_file_utils.py::test_xglob_private
+ tests/test_file_utils.py::test_xwalk_private
tests/test_hub.py::test_convert_to_parquet
+ tests/packaged_modules/test_cache.py::test_cache_capital_letters
+ tests/packaged_modules/test_folder_based_builder.py::test_data_files_with_different_levels_no_metadata
+ tests/packaged_modules/test_folder_based_builder.py::test_data_files_with_one_label_no_metadata
+ tests/test_data_files.py::test_DataFilesList_from_patterns_locally_with_extra_files
+ tests/test_data_files.py::test_DataFilesDict_from_patterns_locally_or_remote_hashing
+ tests/test_file_utils.py::test_xopen_remote
+ tests/test_hub.py::test_delete_from_hub
+ tests/test_offline_util.py::test_offline_with_timeout
+ tests/test_search.py::ElasticSearchIndexTest::test_elasticsearch
)
distutils-r1_src_test
}
diff --git a/sci-ml/datasets/files/datasets-2.21.0-tests.patch b/sci-ml/datasets/files/datasets-2.21.0-tests.patch
deleted file mode 100644
index 0a55459db109..000000000000
--- a/sci-ml/datasets/files/datasets-2.21.0-tests.patch
+++ /dev/null
@@ -1,22 +0,0 @@
---- a/tests/test_arrow_dataset.py 2024-02-20 21:53:24.248470991 +0100
-+++ b/tests/test_arrow_dataset.py 2024-02-20 21:53:29.441804737 +0100
-@@ -4131,7 +4131,6 @@
- [
- "relative/path",
- "/absolute/path",
-- "s3://bucket/relative/path",
- "hdfs://relative/path",
- "hdfs:///absolute/path",
- ],
---- a/tests/packaged_modules/test_audiofolder.py 2023-05-06 14:00:39.560876163 +0200
-+++ b/tests/packaged_modules/test_audiofolder.py 2023-05-06 14:01:26.005212423 +0200
-@@ -1,9 +1,8 @@
- import shutil
- import textwrap
-
- import numpy as np
- import pytest
--import soundfile as sf
-
- from datasets import Audio, ClassLabel, Features, Value
- from datasets.builder import InvalidConfigName
diff --git a/sci-ml/datasets/files/datasets-3.0.2-tests.patch b/sci-ml/datasets/files/datasets-3.0.2-tests.patch
deleted file mode 100644
index 5ff3d6c85c57..000000000000
--- a/sci-ml/datasets/files/datasets-3.0.2-tests.patch
+++ /dev/null
@@ -1,10 +0,0 @@
---- a/tests/test_arrow_dataset.py 2024-02-20 21:53:24.248470991 +0100
-+++ b/tests/test_arrow_dataset.py 2024-02-20 21:53:29.441804737 +0100
-@@ -4131,7 +4131,6 @@
- [
- "relative/path",
- "/absolute/path",
-- "s3://bucket/relative/path",
- "hdfs://relative/path",
- "hdfs:///absolute/path",
- ],
diff --git a/sci-ml/datasets/metadata.xml b/sci-ml/datasets/metadata.xml
index 94c112402049..f7e5d145210a 100644
--- a/sci-ml/datasets/metadata.xml
+++ b/sci-ml/datasets/metadata.xml
@@ -5,6 +5,10 @@
<email>tupone@gentoo.org</email>
<name>Tupone Alfredo</name>
</maintainer>
+ <use>
+ <flag name="torch">Use pytorch</flag>
+ <flag name="vision">Support vision</flag>
+ </use>
<upstream>
<remote-id type="github">huggingface/datasets</remote-id>
<remote-id type="pypi">datasets</remote-id>