From 7fa8a1e3f149eea4c844c47cfe26136ed7614c58 Mon Sep 17 00:00:00 2001 From: V3n3RiX Date: Mon, 15 Apr 2024 12:04:03 +0100 Subject: gentoo auto-resync : 15:04:2024 - 12:04:02 --- sci-libs/tokenizers/Manifest | 3 +- .../tokenizers/files/tokenizers-0.15.2-test.patch | 39 +++ sci-libs/tokenizers/tokenizers-0.15.2-r1.ebuild | 344 +++++++++++++++++++++ sci-libs/tokenizers/tokenizers-0.15.2.ebuild | 339 -------------------- 4 files changed, 385 insertions(+), 340 deletions(-) create mode 100644 sci-libs/tokenizers/files/tokenizers-0.15.2-test.patch create mode 100644 sci-libs/tokenizers/tokenizers-0.15.2-r1.ebuild delete mode 100644 sci-libs/tokenizers/tokenizers-0.15.2.ebuild (limited to 'sci-libs/tokenizers') diff --git a/sci-libs/tokenizers/Manifest b/sci-libs/tokenizers/Manifest index 9db96f16d850..80a037ac892f 100644 --- a/sci-libs/tokenizers/Manifest +++ b/sci-libs/tokenizers/Manifest @@ -1,3 +1,4 @@ +AUX tokenizers-0.15.2-test.patch 1783 BLAKE2B eb91905fb1773a57b7553d9d95ce326da6ed12729d1f4111504b58d8e825d8cfbe3303e0663703f8620e72c91e70573fd7b6fdf021a165c44a04d0a84e87622f SHA512 763276325c87a4fcca24e482726f50a7db9640b436314ac3a7f47fb868502360ab0c65eecafe53ee12f4a94c3a2c56b69942ceaf636479eb005e756d8cf8c1b1 DIST adler-1.0.2.crate 12778 BLAKE2B a1dc17786adae945ac09d3525e609ed944e6465690787bbb831a1b9d53793cba1989793d0a5606d5d23ee20d36457923d451b1b3530c9ec7072a487aa3e55bbd SHA512 7ab190d31890fc05b0b55d8e2c6527a505e06793d5496be0b3831e0513412f9ba97f8148f6f68ed0770fa9cd980a5092d885e058becf1d5506b7c74b82674aa1 DIST aho-corasick-1.1.1.crate 182812 BLAKE2B df74c2cfa0ae392a8d466e370ba761c4cd37c65773affba9a3cdcf7f5797b34b4a25e1646be3de5081644b34db2dce273609edb4f18a1ce7fdbf31ac28a10b88 SHA512 a894e1cefbb63a3b7b78a3676874d0b7a507c27970f48cdfbda1e5deefbf6b20ed4271b660a12eea77b318cd2fa0f80850a0b1ddfe0d0731ffa326c0fc295692 DIST aho-corasick-1.1.2.crate 183136 BLAKE2B 2d4306d8968061b9f7e50190be6a92b3f668169ba1b9f9691de08a57c96185f7a4288d20c64cb8488a260eb18d3ed4b0e8358b0cca47aa44759b2e448049cbaa SHA512 61ef5092673ab5a60bec4e92df28a91fe6171ba59d5829ffe41fc55aff3bfb755533a4ad53dc7bf827a0b789fcce593b17e69d1fcfb3694f06ed3b1bd535d40c @@ -419,5 +420,5 @@ DIST windows_x86_64_msvc-0.52.0.crate 821600 BLAKE2B cc448b65f98fc0fc4949ae622b7 DIST windows_x86_64_msvc-0.52.4.crate 828019 BLAKE2B 08163b63d934114457cd64b1c372f8a0cfc1ebf48a2efb41d79031c58ea64e023acd32d2f5075b8b78536998188138562e584ece95f2021b4bc71087ac45f026 SHA512 0671fa3c0463c6d65b525ece8bc91eab2f75cb534de86ba2b1e854d4136fcb439717441881206dba7cfb602493bc24d2aefa96abf8977f5a0fe38d41eadc90f1 DIST zeroize-1.7.0.crate 19039 BLAKE2B 2f94a5025f409bd2b96a456d2f78a34c6b05b5554abe7ef3fad2a55a8fcff8a6a1b971be660aa4c2954ab7d6e89bebc431036e349edef74711292f9f64b1dbae SHA512 9d31e3e76e8c861309a3579c21f6da5fd6b056c7d7a350427445a1a832e8827204804783f7f9b808acaa2148efef883d9078bf84943b1db55526bba5bf5a2756 EBUILD tokenizers-0.14.1-r1.ebuild 6416 BLAKE2B a05371056932d5edc2b62b29de712c9d388391071a239114a5543ef583d52ca4e99e89e7411c872328c42de19e098dd02c4bf29992cfa97cb6683dc5b442ad0d SHA512 aafc4c2bdf01f6224d8393ba5d31bb3416b9917651931d06593ba395620de99f2fe553502c1b8e5653e81164dcdc863f13b0c4924f5465f6fd72ca239a96a4f3 -EBUILD tokenizers-0.15.2.ebuild 6487 BLAKE2B b5d5fcb2c491ad0e16b2052605118597e83646151f6f8f785915ee3a0a2e6780e645ab8d8ae3bc122de25938d3a442e26d708e9150ac15cf2fc8560ba08f293c SHA512 4f1f8345adc0ba1e93a7d7ac025b56ad4248b2deece840eae2ebec9cb8d17a28c811ceed4a53ec5990857e78ccffb300eb41b903094a5aba973a766376ebb7f9 +EBUILD tokenizers-0.15.2-r1.ebuild 6597 BLAKE2B f4e30a0b23e8dcb051b9e0c8de5547435277d5bdb882f436b1a38b00864b75f6dc84f7fb827aae20a1af4da8e36cf76ef4726365984b8240b419f3404c12e0f5 SHA512 3817ab6a8052723bf71483f14a1da578d233cd97a2088ffa01821fade6d7f313f185edd2cb1345f58b6a00fc6682bf2de6176892c03d2131e114c9a6a43308ce MISC metadata.xml 335 BLAKE2B 55e4d7b301e4315210bf24a4056741cc923c6a72ae323158c5715fade20db86de8a03464bf8f12fe7116689b41aa0519a0070eebff5f63801e23a9c4a9698a92 SHA512 77f537f495b90ad031842f704c5a629cc995fcd848f11defeb597cc16d2d7bab07d262f05a50b07788d2f58fb60eacbefdfa8328215b89b0c661a05b18c4555d diff --git a/sci-libs/tokenizers/files/tokenizers-0.15.2-test.patch b/sci-libs/tokenizers/files/tokenizers-0.15.2-test.patch new file mode 100644 index 000000000000..01a872cb846a --- /dev/null +++ b/sci-libs/tokenizers/files/tokenizers-0.15.2-test.patch @@ -0,0 +1,39 @@ +--- a/tests/bindings/test_trainers.py 2024-04-07 18:21:19.443506351 +0200 ++++ b/tests/bindings/test_trainers.py 2024-04-07 18:21:54.893466083 +0200 +@@ -295,8 +295,8 @@ + tokenizer.pre_tokenizer = pre_tokenizers.Sequence( + [pre_tokenizers.Whitespace(), pre_tokenizers.Digits(individual_digits=True)] + ) +- tokenizer.train(files=["data/big.txt"], trainer=trainer) ++ tokenizer.train(files=["tests/data/big.txt"], trainer=trainer) + +- tokenizer.save("data/tokenizer.json") ++ tokenizer.save("tests/data/tokenizer.json") + +- tokenizer.from_file("data/tokenizer.json") ++ tokenizer.from_file("tests/data/tokenizer.json") +--- a/tests/documentation/test_tutorial_train_from_iterators.py 2024-04-07 18:19:08.653593406 +0200 ++++ b/tests/documentation/test_tutorial_train_from_iterators.py 2024-04-07 18:19:39.206906910 +0200 +@@ -40,7 +40,7 @@ + def setup_gzip_files(self, train_files): + with open(train_files["small"], "rt") as small: + for n in range(3): +- path = f"data/my-file.{n}.gz" ++ path = f"tests/data/my-file.{n}.gz" + with gzip.open(path, "wt") as f: + f.write(small.read()) + +@@ -87,11 +87,11 @@ + # START single_gzip + import gzip + +- with gzip.open("data/my-file.0.gz", "rt") as f: ++ with gzip.open("tests/data/my-file.0.gz", "rt") as f: + tokenizer.train_from_iterator(f, trainer=trainer) + # END single_gzip + # START multi_gzip +- files = ["data/my-file.0.gz", "data/my-file.1.gz", "data/my-file.2.gz"] ++ files = ["tests/data/my-file.0.gz", "tests/data/my-file.1.gz", "tests/data/my-file.2.gz"] + + def gzip_iterator(): + for path in files: diff --git a/sci-libs/tokenizers/tokenizers-0.15.2-r1.ebuild b/sci-libs/tokenizers/tokenizers-0.15.2-r1.ebuild new file mode 100644 index 000000000000..ed6b224ac702 --- /dev/null +++ b/sci-libs/tokenizers/tokenizers-0.15.2-r1.ebuild @@ -0,0 +1,344 @@ +# Copyright 2023-2024 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +# Autogenerated by pycargoebuild 0.10 + +EAPI=8 + +DISTUTILS_USE_PEP517=maturin +PYTHON_COMPAT=( python3_{10..12} ) +DISTUTILS_EXT=1 +DISTUTILS_SINGLE_IMPL=1 + +CRATES=" + adler@1.0.2 + aho-corasick@1.1.2 + aho-corasick@1.1.3 + anes@0.1.6 + anstream@0.6.11 + anstream@0.6.13 + anstyle-parse@0.2.3 + anstyle-query@1.0.2 + anstyle-wincon@3.0.2 + anstyle@1.0.6 + assert_approx_eq@1.1.0 + autocfg@1.1.0 + autocfg@1.2.0 + base64@0.13.1 + base64@0.21.7 + bit-set@0.5.3 + bit-vec@0.6.3 + bitflags@1.3.2 + bitflags@2.4.2 + bitflags@2.5.0 + bumpalo@3.15.4 + cast@0.3.0 + cc@1.0.83 + cc@1.0.90 + cfg-if@1.0.0 + ciborium-io@0.2.2 + ciborium-ll@0.2.2 + ciborium@0.2.2 + clap@4.5.0 + clap@4.5.4 + clap_builder@4.5.0 + clap_builder@4.5.2 + clap_derive@4.5.0 + clap_derive@4.5.4 + clap_lex@0.7.0 + colorchoice@1.0.0 + console@0.15.8 + core-foundation-sys@0.8.6 + core-foundation@0.9.4 + crc32fast@1.4.0 + criterion-plot@0.5.0 + criterion@0.5.1 + crossbeam-deque@0.8.5 + crossbeam-epoch@0.9.18 + crossbeam-utils@0.8.19 + crunchy@0.2.2 + darling@0.14.4 + darling_core@0.14.4 + darling_macro@0.14.4 + derive_builder@0.12.0 + derive_builder_core@0.12.0 + derive_builder_macro@0.12.0 + dirs-sys@0.4.1 + dirs@5.0.1 + either@1.10.0 + encode_unicode@0.3.6 + env_logger@0.10.2 + errno@0.3.8 + esaxx-rs@0.1.10 + fancy-regex@0.13.0 + fastrand@2.0.1 + fastrand@2.0.2 + flate2@1.0.28 + fnv@1.0.7 + foreign-types-shared@0.1.1 + foreign-types@0.3.2 + form_urlencoded@1.2.1 + getrandom@0.2.12 + half@2.4.0 + heck@0.4.1 + heck@0.5.0 + hermit-abi@0.3.5 + hermit-abi@0.3.9 + hf-hub@0.3.2 + humantime@2.1.0 + ident_case@1.0.1 + idna@0.5.0 + indicatif@0.17.8 + indoc@2.0.4 + indoc@2.0.5 + instant@0.1.12 + is-terminal@0.4.12 + itertools@0.10.5 + itertools@0.11.0 + itertools@0.12.1 + itoa@1.0.10 + itoa@1.0.11 + js-sys@0.3.69 + lazy_static@1.4.0 + libc@0.2.153 + libredox@0.1.3 + linux-raw-sys@0.4.13 + lock_api@0.4.11 + log@0.4.20 + log@0.4.21 + macro_rules_attribute-proc_macro@0.2.0 + macro_rules_attribute@0.2.0 + matrixmultiply@0.3.8 + memchr@2.7.1 + memchr@2.7.2 + memoffset@0.9.0 + memoffset@0.9.1 + minimal-lexical@0.2.1 + miniz_oxide@0.7.2 + monostate-impl@0.1.11 + monostate@0.1.11 + native-tls@0.2.11 + ndarray@0.15.6 + nom@7.1.3 + num-complex@0.4.5 + num-integer@0.1.46 + num-traits@0.2.18 + number_prefix@0.4.0 + numpy@0.20.0 + once_cell@1.19.0 + onig@6.4.0 + onig_sys@69.8.1 + oorandom@11.1.3 + openssl-macros@0.1.1 + openssl-probe@0.1.5 + openssl-sys@0.9.102 + openssl@0.10.64 + option-ext@0.2.0 + parking_lot@0.12.1 + parking_lot_core@0.9.9 + paste@1.0.14 + percent-encoding@2.3.1 + pkg-config@0.3.29 + pkg-config@0.3.30 + plotters-backend@0.3.5 + plotters-svg@0.3.5 + plotters@0.3.5 + portable-atomic@1.6.0 + ppv-lite86@0.2.17 + proc-macro2@1.0.78 + proc-macro2@1.0.79 + pyo3-build-config@0.20.2 + pyo3-build-config@0.20.3 + pyo3-ffi@0.20.2 + pyo3-ffi@0.20.3 + pyo3-macros-backend@0.20.2 + pyo3-macros-backend@0.20.3 + pyo3-macros@0.20.2 + pyo3-macros@0.20.3 + pyo3@0.20.2 + pyo3@0.20.3 + quote@1.0.35 + rand@0.8.5 + rand_chacha@0.3.1 + rand_core@0.6.4 + rawpointer@0.2.1 + rayon-cond@0.3.0 + rayon-core@1.12.1 + rayon@1.8.1 + rayon@1.10.0 + redox_syscall@0.4.1 + regex-automata@0.4.5 + regex-syntax@0.8.2 + redox_users@0.4.5 + regex-automata@0.4.6 + regex-syntax@0.8.3 + regex@1.10.3 + regex@1.10.4 + ring@0.17.8 + rustc-hash@1.1.0 + rustix@0.38.31 + rustix@0.38.32 + rustls-pki-types@1.4.1 + rustls-webpki@0.102.2 + rustls@0.22.3 + ryu@1.0.16 + ryu@1.0.17 + scopeguard@1.2.0 + same-file@1.0.6 + schannel@0.1.23 + scopeguard@1.2.0 + security-framework-sys@2.10.0 + security-framework@2.10.0 + serde@1.0.196 + serde@1.0.197 + serde_derive@1.0.196 + serde_derive@1.0.197 + serde_json@1.0.113 + serde_json@1.0.115 + smallvec@1.13.1 + smallvec@1.13.2 + spin@0.9.8 + spm_precompiled@0.1.4 + strsim@0.10.0 + strsim@0.11.0 + strsim@0.11.1 + subtle@2.5.0 + syn@1.0.109 + syn@2.0.48 + syn@2.0.58 + target-lexicon@0.12.13 + target-lexicon@0.12.14 + tempfile@3.10.0 + tempfile@3.10.1 + termcolor@1.4.1 + thiserror-impl@1.0.56 + thiserror-impl@1.0.58 + thiserror@1.0.56 + thiserror@1.0.58 + tinytemplate@1.2.1 + tinyvec@1.6.0 + tinyvec_macros@0.1.1 + unicode-bidi@0.3.15 + unicode-ident@1.0.12 + unicode-normalization-alignments@0.1.12 + unicode-normalization@0.1.23 + unicode-segmentation@1.11.0 + unicode-width@0.1.11 + unicode_categories@0.1.1 + unindent@0.2.3 + untrusted@0.9.0 + ureq@2.9.6 + url@2.5.0 + utf8parse@0.2.1 + vcpkg@0.2.15 + walkdir@2.5.0 + wasi@0.11.0+wasi-snapshot-preview1 + wasm-bindgen-backend@0.2.92 + wasm-bindgen-macro-support@0.2.92 + wasm-bindgen-macro@0.2.92 + wasm-bindgen-shared@0.2.92 + wasm-bindgen@0.2.92 + web-sys@0.3.69 + webpki-roots@0.26.1 + winapi-i686-pc-windows-gnu@0.4.0 + winapi-util@0.1.6 + winapi-x86_64-pc-windows-gnu@0.4.0 + winapi@0.3.9 + windows-sys@0.48.0 + windows-sys@0.52.0 + windows-targets@0.48.5 + windows-targets@0.52.0 + windows-targets@0.52.4 + windows_aarch64_gnullvm@0.48.5 + windows_aarch64_gnullvm@0.52.0 + windows_aarch64_gnullvm@0.52.4 + windows_aarch64_msvc@0.48.5 + windows_aarch64_msvc@0.52.0 + windows_aarch64_msvc@0.52.4 + windows_i686_gnu@0.48.5 + windows_i686_gnu@0.52.0 + windows_i686_gnu@0.52.4 + windows_i686_msvc@0.48.5 + windows_i686_msvc@0.52.0 + windows_i686_msvc@0.52.4 + windows_x86_64_gnu@0.48.5 + windows_x86_64_gnu@0.52.0 + windows_x86_64_gnu@0.52.4 + windows_x86_64_gnullvm@0.48.5 + windows_x86_64_gnullvm@0.52.0 + windows_x86_64_gnullvm@0.52.4 + windows_x86_64_msvc@0.48.5 + windows_x86_64_msvc@0.52.0 + windows_x86_64_msvc@0.52.4 + zeroize@1.7.0 +" + +inherit cargo distutils-r1 + +DESCRIPTION="Implementation of today's most used tokenizers" +HOMEPAGE="https://github.com/huggingface/tokenizers" +SRC_URI=" + https://github.com/huggingface/${PN}/archive/refs/tags/v${PV}.tar.gz + -> ${P}.gh.tar.gz + ${CARGO_CRATE_URIS} +" + +LICENSE="Apache-2.0" +# Dependent crate licenses +LICENSE+=" + Apache-2.0 Apache-2.0-with-LLVM-exceptions BSD-2 BSD ISC MIT MPL-2.0 + Unicode-DFS-2016 +" +SLOT="0" +KEYWORDS="~amd64" + +BDEPEND=" + test? ( sci-libs/datasets[${PYTHON_SINGLE_USEDEP}] ) + $(python_gen_cond_dep ' + dev-python/setuptools-rust[${PYTHON_USEDEP}] + ') +" + +distutils_enable_tests pytest + +QA_FLAGS_IGNORED="/usr/bin/cli .*/site-packages/tokenizers/.*so" + +src_unpack() { + cargo_src_unpack +} + +src_prepare() { + default + cd bindings/python + eapply "${FILESDIR}"/${P}-test.patch + distutils-r1_src_prepare +} + +src_configure() { + cd tokenizers + cargo_src_configure + cd ../bindings/python + distutils-r1_src_configure +} + +src_compile() { + cd tokenizers + cargo_src_compile + cd ../bindings/python + distutils-r1_src_compile +} + +src_test() { + cd tokenizers + # Tests do not work + #cargo_src_test + cd ../bindings/python + distutils-r1_src_test +} + +src_install() { + cd tokenizers + cargo_src_install + cd ../bindings/python + distutils-r1_src_install +} diff --git a/sci-libs/tokenizers/tokenizers-0.15.2.ebuild b/sci-libs/tokenizers/tokenizers-0.15.2.ebuild deleted file mode 100644 index d2da8b88ac9b..000000000000 --- a/sci-libs/tokenizers/tokenizers-0.15.2.ebuild +++ /dev/null @@ -1,339 +0,0 @@ -# Copyright 2023-2024 Gentoo Authors -# Distributed under the terms of the GNU General Public License v2 - -# Autogenerated by pycargoebuild 0.10 - -EAPI=8 - -DISTUTILS_USE_PEP517=maturin -PYTHON_COMPAT=( python3_{9..12} ) -DISTUTILS_EXT=1 - -CRATES=" - adler@1.0.2 - aho-corasick@1.1.2 - aho-corasick@1.1.3 - anes@0.1.6 - anstream@0.6.11 - anstream@0.6.13 - anstyle-parse@0.2.3 - anstyle-query@1.0.2 - anstyle-wincon@3.0.2 - anstyle@1.0.6 - assert_approx_eq@1.1.0 - autocfg@1.1.0 - autocfg@1.2.0 - base64@0.13.1 - base64@0.21.7 - bit-set@0.5.3 - bit-vec@0.6.3 - bitflags@1.3.2 - bitflags@2.4.2 - bitflags@2.5.0 - bumpalo@3.15.4 - cast@0.3.0 - cc@1.0.83 - cc@1.0.90 - cfg-if@1.0.0 - ciborium-io@0.2.2 - ciborium-ll@0.2.2 - ciborium@0.2.2 - clap@4.5.0 - clap@4.5.4 - clap_builder@4.5.0 - clap_builder@4.5.2 - clap_derive@4.5.0 - clap_derive@4.5.4 - clap_lex@0.7.0 - colorchoice@1.0.0 - console@0.15.8 - core-foundation-sys@0.8.6 - core-foundation@0.9.4 - crc32fast@1.4.0 - criterion-plot@0.5.0 - criterion@0.5.1 - crossbeam-deque@0.8.5 - crossbeam-epoch@0.9.18 - crossbeam-utils@0.8.19 - crunchy@0.2.2 - darling@0.14.4 - darling_core@0.14.4 - darling_macro@0.14.4 - derive_builder@0.12.0 - derive_builder_core@0.12.0 - derive_builder_macro@0.12.0 - dirs-sys@0.4.1 - dirs@5.0.1 - either@1.10.0 - encode_unicode@0.3.6 - env_logger@0.10.2 - errno@0.3.8 - esaxx-rs@0.1.10 - fancy-regex@0.13.0 - fastrand@2.0.1 - fastrand@2.0.2 - flate2@1.0.28 - fnv@1.0.7 - foreign-types-shared@0.1.1 - foreign-types@0.3.2 - form_urlencoded@1.2.1 - getrandom@0.2.12 - half@2.4.0 - heck@0.4.1 - heck@0.5.0 - hermit-abi@0.3.5 - hermit-abi@0.3.9 - hf-hub@0.3.2 - humantime@2.1.0 - ident_case@1.0.1 - idna@0.5.0 - indicatif@0.17.8 - indoc@2.0.4 - indoc@2.0.5 - instant@0.1.12 - is-terminal@0.4.12 - itertools@0.10.5 - itertools@0.11.0 - itertools@0.12.1 - itoa@1.0.10 - itoa@1.0.11 - js-sys@0.3.69 - lazy_static@1.4.0 - libc@0.2.153 - libredox@0.1.3 - linux-raw-sys@0.4.13 - lock_api@0.4.11 - log@0.4.20 - log@0.4.21 - macro_rules_attribute-proc_macro@0.2.0 - macro_rules_attribute@0.2.0 - matrixmultiply@0.3.8 - memchr@2.7.1 - memchr@2.7.2 - memoffset@0.9.0 - memoffset@0.9.1 - minimal-lexical@0.2.1 - miniz_oxide@0.7.2 - monostate-impl@0.1.11 - monostate@0.1.11 - native-tls@0.2.11 - ndarray@0.15.6 - nom@7.1.3 - num-complex@0.4.5 - num-integer@0.1.46 - num-traits@0.2.18 - number_prefix@0.4.0 - numpy@0.20.0 - once_cell@1.19.0 - onig@6.4.0 - onig_sys@69.8.1 - oorandom@11.1.3 - openssl-macros@0.1.1 - openssl-probe@0.1.5 - openssl-sys@0.9.102 - openssl@0.10.64 - option-ext@0.2.0 - parking_lot@0.12.1 - parking_lot_core@0.9.9 - paste@1.0.14 - percent-encoding@2.3.1 - pkg-config@0.3.29 - pkg-config@0.3.30 - plotters-backend@0.3.5 - plotters-svg@0.3.5 - plotters@0.3.5 - portable-atomic@1.6.0 - ppv-lite86@0.2.17 - proc-macro2@1.0.78 - proc-macro2@1.0.79 - pyo3-build-config@0.20.2 - pyo3-build-config@0.20.3 - pyo3-ffi@0.20.2 - pyo3-ffi@0.20.3 - pyo3-macros-backend@0.20.2 - pyo3-macros-backend@0.20.3 - pyo3-macros@0.20.2 - pyo3-macros@0.20.3 - pyo3@0.20.2 - pyo3@0.20.3 - quote@1.0.35 - rand@0.8.5 - rand_chacha@0.3.1 - rand_core@0.6.4 - rawpointer@0.2.1 - rayon-cond@0.3.0 - rayon-core@1.12.1 - rayon@1.8.1 - rayon@1.10.0 - redox_syscall@0.4.1 - regex-automata@0.4.5 - regex-syntax@0.8.2 - redox_users@0.4.5 - regex-automata@0.4.6 - regex-syntax@0.8.3 - regex@1.10.3 - regex@1.10.4 - ring@0.17.8 - rustc-hash@1.1.0 - rustix@0.38.31 - rustix@0.38.32 - rustls-pki-types@1.4.1 - rustls-webpki@0.102.2 - rustls@0.22.3 - ryu@1.0.16 - ryu@1.0.17 - scopeguard@1.2.0 - same-file@1.0.6 - schannel@0.1.23 - scopeguard@1.2.0 - security-framework-sys@2.10.0 - security-framework@2.10.0 - serde@1.0.196 - serde@1.0.197 - serde_derive@1.0.196 - serde_derive@1.0.197 - serde_json@1.0.113 - serde_json@1.0.115 - smallvec@1.13.1 - smallvec@1.13.2 - spin@0.9.8 - spm_precompiled@0.1.4 - strsim@0.10.0 - strsim@0.11.0 - strsim@0.11.1 - subtle@2.5.0 - syn@1.0.109 - syn@2.0.48 - syn@2.0.58 - target-lexicon@0.12.13 - target-lexicon@0.12.14 - tempfile@3.10.0 - tempfile@3.10.1 - termcolor@1.4.1 - thiserror-impl@1.0.56 - thiserror-impl@1.0.58 - thiserror@1.0.56 - thiserror@1.0.58 - tinytemplate@1.2.1 - tinyvec@1.6.0 - tinyvec_macros@0.1.1 - unicode-bidi@0.3.15 - unicode-ident@1.0.12 - unicode-normalization-alignments@0.1.12 - unicode-normalization@0.1.23 - unicode-segmentation@1.11.0 - unicode-width@0.1.11 - unicode_categories@0.1.1 - unindent@0.2.3 - untrusted@0.9.0 - ureq@2.9.6 - url@2.5.0 - utf8parse@0.2.1 - vcpkg@0.2.15 - walkdir@2.5.0 - wasi@0.11.0+wasi-snapshot-preview1 - wasm-bindgen-backend@0.2.92 - wasm-bindgen-macro-support@0.2.92 - wasm-bindgen-macro@0.2.92 - wasm-bindgen-shared@0.2.92 - wasm-bindgen@0.2.92 - web-sys@0.3.69 - webpki-roots@0.26.1 - winapi-i686-pc-windows-gnu@0.4.0 - winapi-util@0.1.6 - winapi-x86_64-pc-windows-gnu@0.4.0 - winapi@0.3.9 - windows-sys@0.48.0 - windows-sys@0.52.0 - windows-targets@0.48.5 - windows-targets@0.52.0 - windows-targets@0.52.4 - windows_aarch64_gnullvm@0.48.5 - windows_aarch64_gnullvm@0.52.0 - windows_aarch64_gnullvm@0.52.4 - windows_aarch64_msvc@0.48.5 - windows_aarch64_msvc@0.52.0 - windows_aarch64_msvc@0.52.4 - windows_i686_gnu@0.48.5 - windows_i686_gnu@0.52.0 - windows_i686_gnu@0.52.4 - windows_i686_msvc@0.48.5 - windows_i686_msvc@0.52.0 - windows_i686_msvc@0.52.4 - windows_x86_64_gnu@0.48.5 - windows_x86_64_gnu@0.52.0 - windows_x86_64_gnu@0.52.4 - windows_x86_64_gnullvm@0.48.5 - windows_x86_64_gnullvm@0.52.0 - windows_x86_64_gnullvm@0.52.4 - windows_x86_64_msvc@0.48.5 - windows_x86_64_msvc@0.52.0 - windows_x86_64_msvc@0.52.4 - zeroize@1.7.0 -" - -inherit cargo distutils-r1 - -DESCRIPTION="Implementation of today's most used tokenizers" -HOMEPAGE="https://github.com/huggingface/tokenizers" -SRC_URI=" - https://github.com/huggingface/${PN}/archive/refs/tags/v${PV}.tar.gz - -> ${P}.gh.tar.gz - ${CARGO_CRATE_URIS} -" - -LICENSE="Apache-2.0" -# Dependent crate licenses -LICENSE+=" - Apache-2.0 Apache-2.0-with-LLVM-exceptions BSD-2 BSD ISC MIT MPL-2.0 - Unicode-DFS-2016 -" -SLOT="0" -KEYWORDS="~amd64" -RESTRICT="test" - -BDEPEND="dev-python/setuptools-rust[${PYTHON_USEDEP}]" - -distutils_enable_tests pytest - -QA_FLAGS_IGNORED="/usr/bin/cli .*/site-packages/tokenizers/.*so" - -src_unpack() { - cargo_src_unpack -} - -src_prepare() { - default - cd bindings/python - distutils-r1_src_prepare -} - -src_configure() { - cd tokenizers - cargo_src_configure - cd ../bindings/python - distutils-r1_src_configure -} - -src_compile() { - cd tokenizers - cargo_src_compile - cd ../bindings/python - distutils-r1_src_compile -} - -src_test() { - cd tokenizers - # Tests do not work - #cargo_src_test - cd ../bindings/python - # Need dataset module - #distutils-r1_src_test -} - -src_install() { - cd tokenizers - cargo_src_install - cd ../bindings/python - distutils-r1_src_install -} -- cgit v1.2.3