summaryrefslogtreecommitdiff
path: root/dev-python/nltk-data
diff options
context:
space:
mode:
authorV3n3RiX <venerix@koprulu.sector>2021-10-26 00:10:07 +0100
committerV3n3RiX <venerix@koprulu.sector>2021-10-26 00:10:07 +0100
commit95461df035e3867364495f065e5e805bf629b2d7 (patch)
tree867dce371a84a696e91be255d89f282975aa0480 /dev-python/nltk-data
parent46eedbedafdb0040c37884982d4c775ce277fb7b (diff)
gentoo resync : 25.10.2021
Diffstat (limited to 'dev-python/nltk-data')
-rw-r--r--dev-python/nltk-data/Manifest5
-rw-r--r--dev-python/nltk-data/nltk-data-20211023.ebuild (renamed from dev-python/nltk-data/nltk-data-20200312-r1.ebuild)61
2 files changed, 43 insertions, 23 deletions
diff --git a/dev-python/nltk-data/Manifest b/dev-python/nltk-data/Manifest
index 71dc57936690..c8d69b0aadf5 100644
--- a/dev-python/nltk-data/Manifest
+++ b/dev-python/nltk-data/Manifest
@@ -81,7 +81,7 @@ DIST nltk-smultron-20200312.zip 166207 BLAKE2B d0c3e75dd108965e260d913e0c02137da
DIST nltk-snowball_data-20200312.zip 6785405 BLAKE2B 44c10439b142540ac7eece967efa1431fd8f45342f0a90875dacf29ad374fe4c7d30af11d42ba45e0f1ec1836d56b2ff684ee352c5e8536cfb5db5eb7632285b SHA512 6c8a9259d88f6f7f499867d83b731de99d7fa4e8827ecedf836f653fc1a810efa9f6c5c6e2720a9e6610bc00978956b6a119bd08b70e3e241c4e9faccddd81d8
DIST nltk-spanish_grammars-20200312.zip 4047 BLAKE2B d8a8dbb558850a6a60f1fe5ab0f617f3a0f3c64bc7d49980cf793d374c6679d1bd42afb7e61776737b5eec162f2520abf2ee3acc92ea9ee0f397c3089b3b5b28 SHA512 4513347156e9351c259c0e2448198d68354bbc95e0a54561c31a88f13f333ebcba3e294c820cb62036665f2904eb6a7137546cd580e361c0423c30a8aed950b2
DIST nltk-state_union-20200312.zip 808757 BLAKE2B 2d352af0ced736d3c11a821eaba0b035b3b5b6b0f20db3bd5d4ac2451f99daf68bffd3ad984bd404ecc4a1e67ca9281c529af2cc9e295a3a7330f36ee9640bc8 SHA512 a6fc83e6230e57ba66a7af62f0d2a5d44a14530ed1e0e914590b3f8b5bf939967c126a5e6f6899ba9134843893f65212e836d311109393c1200191a5c3163485
-DIST nltk-stopwords-20200312.zip 23047 BLAKE2B a0677cc0d4a3d54ee6e27eab8fb7635d6cd29265204896870e57457a54459f1d6cabc0c4e76e749397f5eee299eed0b524fcbc2033ea17d81cdb6cd98c5ed968 SHA512 31774fd3db2e0fba0209db71c08f9b2d971311ea4a59739cfdc0a9ae34f6c6c593f2a991a14ee20b0de8b380215e609f8398ed50c546775322ab8c4c3f8d06dd
+DIST nltk-stopwords-20211023.zip 26220 BLAKE2B 50219508c5fb24c1392064ea0546ca9060829f51689c0d626bb99e3fa8f712df98ed475fc0d27d99f934fb156ce65df91a8e7c22e1b4f16833339fb0aea34a9e SHA512 8308623953560281288b64e695638ca3fa28e1b6201b538d01650d6dfd08e821687217c8d012e93adfa2a48afebdda11af1bb86d638358c2931f36754d1e15ef
DIST nltk-subjectivity-20200312.zip 521628 BLAKE2B 0a8777a5b91b1b825fbde41cc927d496480129f0a810349bddde2036ed16f37611f2bc3b007e74fe36523612529a92433d32d094be72d247f5faef8220c3c491 SHA512 a3cc4d2d20f26c5eabd86fbced2c013e69d46e607013278eb35831a62e57523a17aec1b580ba62c7a867e61a561e1b222d8430f0c1e2d429a9479e12b008c5b7
DIST nltk-swadesh-20200312.zip 22828 BLAKE2B 1cd9d5355c6b53694ec545ca001b0807b4912a7878ba075b0f81ce8b9b22a5c7a18cf52cf2449483a1c89cd20d8d86986dc3d827fa93a7ef5824fddbc0922025 SHA512 90cb32532a5378d05ce34b84b5f8363dbb32f24afac58b0dcc5cdccba98fd7d37def7f4fbe76b11c8c64059bd19df745562bfbf5f4c721d65ce9f4be1348ab76
DIST nltk-switchboard-20200312.zip 791161 BLAKE2B 211116a751ae246fa31b6aca96b396d3642d89ad112588a09f8d91a5b76dc41c7fb4d36c16c6358cd8e0da8056bc83598ed0dc635cf7b1fd8469a0e80b5f1761 SHA512 690e5392dc082c4ac550bde2848aa65117e7a25cbc4bce0887581c531d03be64e21f044ac0a3286648255f0edd7766b1161f5575ad5fd680c7303b34c3226b8f
@@ -102,8 +102,9 @@ DIST nltk-webtext-20200312.zip 646297 BLAKE2B ca072fc38c144b659c76c36c9161641c91
DIST nltk-wmt15_eval-20200312.zip 383096 BLAKE2B 119943db4240171077569b3302c678644c2c9547ba67bfd055751059e0a3ad3ab6a19e4eedf9108d313d46dcd36cf19e11d973981da8c70a01c4cb790a7bd739 SHA512 362395d4c77ebe92f4c19fa8c2000082dc7a2343acc19cccb596ca00db6c40c231b904d807f46e2691cc4c4a0c79d14873b2a1983a494f2ca1485d540d787ceb
DIST nltk-word2vec_sample-20200312.zip 49396025 BLAKE2B 0512b9bb7121a528190079f578e82b4e8f8021bfc0062cfa5613d260f3eee17460aecfeaacb65d950e79d27a653c78633a88c3638ec16377e2dbc3006387ebaa SHA512 09c30a4ab8f9fb6a5b36974b5953260d01cb4f285827fb90a374d054ad775ac978602ab56c452f46f4f8601312e232fd739d2f54dafa44ab8b7b01831cf0d9d1
DIST nltk-wordnet-20200312.zip 10775600 BLAKE2B dac56a8fb1fa6882b1871c394ad2acb2d3be739c424570e27c89fb6983df5f896a8f359092ba82752ddfc0531d83563a219e85f80124202f29bda93181efe4dc SHA512 1923a8bcd56fa0b9a9de91f53070dce28c3a7efbab11d2ef55c87134b1bf30de0f40abab59c39eb15dce54aec9491d8a5a259de212ff4cb25cde0ad09317009a
+DIST nltk-wordnet31-20211023.zip 11055271 BLAKE2B e41a1951af5a71c9506d1e948b860574c94ab0ef31c1789a7e7bfb29c6dccea5b1d8895007631f7b595e9f90306365b5042e7a80dc6e1364fdbf4a5f0cba3b28 SHA512 a86091bd55e3a706892550b232be8f5199092623f1f8305d8c9be967a8527fe7d4ecb6250c369b229fdf52b6f3008106b758adc355fa2ad08b5b0cf2a458c173
DIST nltk-wordnet_ic-20200312.zip 12056682 BLAKE2B c2dc2a646015b23699a72f636b588ec5718c70e6941d9d56863257e1e0396c8cf59ac1dc6ed74e5d7f0c2ee9129d63221a03967bf66a3d335e99160f295ed44d SHA512 1c94451a13af6c76bff60a0cab2e70402a3d9abd2e8fc62a5473f24ab4229feb0afe4faa8d389734697a6cf86d2c8b1dc700bb3afa3cbc279b75d7e0ec19fc6d
DIST nltk-words-20200312.zip 757777 BLAKE2B eaaaaab6c26e206e9b6ce45daf779e3cc6706a06132afeabf013026d0009caee2d678f3c4ea9125b9654f7143bef29ec7a5706b79e5650ea556c6821b7754e6f SHA512 2810f05d3fc7ee6b6f8636fa1ff7b4e8c8cdac12b415cc54d15c69102290122ea138ec4fa36cb483f790c1ac10b0f83ae4c2c3e0e8df7e67e90e962ee5dbb0be
DIST nltk-ycoe-20200312.zip 477 BLAKE2B 574835aa011a06a06363e26facd6a6f583a1dc1cac2de39adff59d8ab48eefac030b43d935a2f79af855259f2a9a571193dae2811589483af97406ff05c76c9e SHA512 e39ce165074d10ff63cb84ea52905d7ecb937797c8123ed113c5609afe1f63ac44d04d48a681002c4eac21dc9076ac74164b886c6f9ce42f3a102c38d1e8e756
-EBUILD nltk-data-20200312-r1.ebuild 3847 BLAKE2B b032e0b5626119d37dc76c1f99b84dc064f22179a5db115ed98c379d6ca2c2b7e4b9d39fdd50d98f975f3e570aed44b9297ef3c749424c048d9d5318296bf3f9 SHA512 f338140ebba82e3aabe1b2bcdee2c0dc41b4ebe3d22bf828914d979af505707aa3c09ca0563369ed305c48ad4b5dbdc4830ea5059babce53d6b38eafeac23a14
+EBUILD nltk-data-20211023.ebuild 4181 BLAKE2B 82e9f7818fc387f69b582ed62db6fa0679b25471ec579b442022f392222d15aa1bd7e9d9b4135cd50a9288c3116e2f26244d25ae3d22e635c66f32b6b259b606 SHA512 43b5bc6eca64fe86cfd3ca49cbd7db7a0421d061847fc4615c8767bd6ff1ee4939f8608370ad8fd8fa914f38ce68041ea2be1f133234ec9bd6cbfa1ee1fcc525
MISC metadata.xml 390 BLAKE2B e8f39395d8770de8e0e4c13fd51641c4f4a33935dc9e266a899d50cf9f42780e7682177a81f8902b20255114696d790e1c8aae8fdacd25afe8e6057d68d1b554 SHA512 6f173c8a058d6ae48c8316e00bfcd94e7f297667f729ebdab16733e6ae60ca43918e3e5f992faec07ab53c9682293f4543dbaf06045dfd6ff76cbedc8271afdd
diff --git a/dev-python/nltk-data/nltk-data-20200312-r1.ebuild b/dev-python/nltk-data/nltk-data-20211023.ebuild
index 4a3d58c5db8a..df8437c785be 100644
--- a/dev-python/nltk-data/nltk-data-20200312-r1.ebuild
+++ b/dev-python/nltk-data/nltk-data-20211023.ebuild
@@ -1,4 +1,4 @@
-# Copyright 2020 Gentoo Authors
+# Copyright 2020-2021 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2
EAPI=7
@@ -18,7 +18,7 @@ RESTRICT="bindist mirror"
BDEPEND="app-arch/unzip"
-PACKAGES_ZIP=(
+PACKAGES_ZIP_2020=(
# wget -O - https://www.nltk.org/nltk_data/ | xml sel -t -m '//package[@unzip=0]' -v @subdir -o "/" -v @id -n - | sort
corpora/comtrans
corpora/conll2007
@@ -36,7 +36,7 @@ PACKAGES_ZIP=(
stemmers/snowball_data
)
-PACKAGES_UNPACK=(
+PACKAGES_UNPACK_2020=(
# wget -O - https://www.nltk.org/nltk_data/ | xml sel -t -m '//package[@unzip=1]' -v @subdir -o "/" -v @id -n - | sort
corpora/abc
corpora/alpino
@@ -85,7 +85,6 @@ PACKAGES_UNPACK=(
corpora/shakespeare
corpora/sinica_treebank
corpora/state_union
- corpora/stopwords
corpora/subjectivity
corpora/swadesh
corpora/switchboard
@@ -116,7 +115,12 @@ PACKAGES_UNPACK=(
tokenizers/punkt
)
-PACKAGES_UNPACK_EXTRA=(
+PACKAGES_UNPACK_2021=(
+ corpora/stopwords
+ corpora/wordnet31
+)
+
+PACKAGES_UNPACK_EXTRA_2020=(
chunkers/maxent_ne_chunker
corpora/biocreative_ppi
corpora/brown_tei
@@ -137,48 +141,63 @@ PACKAGES_UNPACK_EXTRA=(
)
add_data() {
- local x
+ local x version=${1}
+ shift
+
for x; do
SRC_URI+="
https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/${x}.zip
- -> nltk-${x#*/}-${PV}.zip"
+ -> nltk-${x#*/}-${version}.zip"
done
}
-add_data "${PACKAGES_ZIP[@]}" "${PACKAGES_UNPACK[@]}"
+add_data 20200312 "${PACKAGES_ZIP_2020[@]}" "${PACKAGES_UNPACK_2020[@]}"
+add_data 20211023 "${PACKAGES_UNPACK_2021[@]}"
SRC_URI+="
extra? ("
-add_data "${PACKAGES_UNPACK_EXTRA[@]}"
+add_data 20200312 "${PACKAGES_UNPACK_EXTRA_2020[@]}"
SRC_URI+="
)"
CHECKREQS_DISK_USR=3G
CHECKREQS_DISK_BUILD=${CHECKREQS_DISK_USR}
-src_unpack() {
- local x
- local to_unpack=( "${PACKAGES_UNPACK[@]}" )
- use extra && to_unpack+=( "${PACKAGES_UNPACK_EXTRA[@]}" )
- for x in "${to_unpack[@]}"; do
+unpack_data() {
+ local x version=${1}
+ shift
+
+ for x; do
local cat=${x%/*}
local pkg=${x#*/}
mkdir -p "${S}/${cat}" || die
cd "${S}/${cat}" || die
- unpack "nltk-${pkg}-${PV}.zip"
+ unpack "nltk-${pkg}-${version}.zip"
done
}
-src_install() {
- dodir /usr/share/nltk_data
- mv * "${ED}/usr/share/nltk_data/" || die
+src_unpack() {
+ unpack_data 20200312 "${PACKAGES_UNPACK_2020[@]}"
+ unpack_data 20211023 "${PACKAGES_UNPACK_2021[@]}"
+ use extra && unpack_data 20200312 "${PACKAGES_UNPACK_EXTRA_2020[@]}"
+}
+
+install_zips() {
+ local x version=${1}
+ shift
- local x
- for x in "${PACKAGES_ZIP[@]}"; do
+ for x; do
local cat=${x%/*}
local pkg=${x#*/}
insinto "/usr/share/nltk_data/${cat}"
- newins "${DISTDIR}/nltk-${pkg}-${PV}.zip" "${pkg}.zip"
+ newins "${DISTDIR}/nltk-${pkg}-${version}.zip" "${pkg}.zip"
done
}
+
+src_install() {
+ dodir /usr/share/nltk_data
+ mv * "${ED}/usr/share/nltk_data/" || die
+
+ install_zips 20200312 "${PACKAGES_ZIP_2020[@]}"
+}