diff options
author | V3n3RiX <venerix@redcorelinux.org> | 2020-04-25 11:37:10 +0100 |
---|---|---|
committer | V3n3RiX <venerix@redcorelinux.org> | 2020-04-25 11:37:10 +0100 |
commit | 38423c67c8a23f6a1bc42038193182e2da3116eb (patch) | |
tree | 04e2cf4bd43601b77daa79fe654e409187093c5e /dev-python/nltk-data | |
parent | 623ee73d661e5ed8475cb264511f683407d87365 (diff) |
gentoo resync : 25.04.2020
Diffstat (limited to 'dev-python/nltk-data')
-rw-r--r-- | dev-python/nltk-data/Manifest | 1 | ||||
-rw-r--r-- | dev-python/nltk-data/nltk-data-20200312-r1.ebuild | 184 |
2 files changed, 185 insertions, 0 deletions
diff --git a/dev-python/nltk-data/Manifest b/dev-python/nltk-data/Manifest index c671c3327b17..147e0105722e 100644 --- a/dev-python/nltk-data/Manifest +++ b/dev-python/nltk-data/Manifest @@ -105,5 +105,6 @@ DIST nltk-wordnet-20200312.zip 10775600 BLAKE2B dac56a8fb1fa6882b1871c394ad2acb2 DIST nltk-wordnet_ic-20200312.zip 12056682 BLAKE2B c2dc2a646015b23699a72f636b588ec5718c70e6941d9d56863257e1e0396c8cf59ac1dc6ed74e5d7f0c2ee9129d63221a03967bf66a3d335e99160f295ed44d SHA512 1c94451a13af6c76bff60a0cab2e70402a3d9abd2e8fc62a5473f24ab4229feb0afe4faa8d389734697a6cf86d2c8b1dc700bb3afa3cbc279b75d7e0ec19fc6d DIST nltk-words-20200312.zip 757777 BLAKE2B eaaaaab6c26e206e9b6ce45daf779e3cc6706a06132afeabf013026d0009caee2d678f3c4ea9125b9654f7143bef29ec7a5706b79e5650ea556c6821b7754e6f SHA512 2810f05d3fc7ee6b6f8636fa1ff7b4e8c8cdac12b415cc54d15c69102290122ea138ec4fa36cb483f790c1ac10b0f83ae4c2c3e0e8df7e67e90e962ee5dbb0be DIST nltk-ycoe-20200312.zip 477 BLAKE2B 574835aa011a06a06363e26facd6a6f583a1dc1cac2de39adff59d8ab48eefac030b43d935a2f79af855259f2a9a571193dae2811589483af97406ff05c76c9e SHA512 e39ce165074d10ff63cb84ea52905d7ecb937797c8123ed113c5609afe1f63ac44d04d48a681002c4eac21dc9076ac74164b886c6f9ce42f3a102c38d1e8e756 +EBUILD nltk-data-20200312-r1.ebuild 3849 BLAKE2B dbceac6bb6c111f6953607ba9520ae42c08b809a594e76dbf05b472f0e392b6dbbf7c036cf207a2512079c9215d8db46f7f11e06302fe40997ac204baea4684c SHA512 7574fc9b41eeef51649a557be0f1e1ce06f6ac8163b3d18f7abee50fa0027760190be4c8349df4d51e450890b6d286f90e5d46bd8e53cef7a985ccc79bb4db47 EBUILD nltk-data-20200312.ebuild 3849 BLAKE2B a2c28c659c7f6451c20641adb5c2d9dfbfc54612ce7edf2a865170fd31bf1f823b7aa7fef272e2207e722ca60670f732c764b7475c2ce1ca530b9da1642a7405 SHA512 59a1e5bf2276d913f2800db5f1f8a71d45cc0d63ca701b39cdd5f3b5b9ce72b460a01d5db57ad346917fa11b180617449ce7cff3804222406859855106e4b6a3 MISC metadata.xml 389 BLAKE2B e9ecfd76b7c3cf4c8482ecbd809fa2dc94862b8315a3893afa7a57097000f4a048e990121e271d89655c77ad5a800669ce2b1a6fd5d01ad27ca6c3941c342e65 SHA512 1c9392dc21cf4cf203d368be88d5d7f3302570668a04caa2383038668542fb8c4e25c907aa8b86edd5e9e1f969c96aa1637f98e3538113c13e4e7c1084f4be5b diff --git a/dev-python/nltk-data/nltk-data-20200312-r1.ebuild b/dev-python/nltk-data/nltk-data-20200312-r1.ebuild new file mode 100644 index 000000000000..c8f12c580e60 --- /dev/null +++ b/dev-python/nltk-data/nltk-data-20200312-r1.ebuild @@ -0,0 +1,184 @@ +# Copyright 2020 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=7 + +inherit check-reqs + +DESCRIPTION="Data files for NLTK" +HOMEPAGE="https://www.nltk.org/nltk_data/" + +# at least some of the files have poorly documented licenses +# TODO: create a USE flag for free-ish subset +LICENSE="all-rights-reserved" +SLOT="0" +KEYWORDS="~amd64 ~x86" +IUSE="extra" +RESTRICT="bindist mirror" + +BDEPEND="app-arch/unzip" + +PACKAGES_ZIP=( + # wget -O - https://www.nltk.org/nltk_data/ | xml sel -t -m '//package[@unzip=0]' -v @subdir -o "/" -v @id -n - | sort + corpora/comtrans + corpora/conll2007 + corpora/jeita + corpora/knbc + corpora/machado + corpora/masc_tagged + corpora/nombank.1.0 + corpora/panlex_swadesh + corpora/propbank + corpora/reuters + corpora/semcor + corpora/universal_treebanks_v20 + sentiment/vader_lexicon + stemmers/snowball_data +) + +PACKAGES_UNPACK=( + # wget -O - https://www.nltk.org/nltk_data/ | xml sel -t -m '//package[@unzip=1]' -v @subdir -o "/" -v @id -n - | sort + corpora/abc + corpora/alpino + corpora/brown + corpora/cess_cat + corpora/cess_esp + corpora/chat80 + corpora/city_database + corpora/cmudict + corpora/comparative_sentences + corpora/conll2000 + corpora/conll2002 + corpora/crubadan + corpora/dependency_treebank + corpora/dolch + corpora/europarl_raw + corpora/floresta + corpora/framenet_v15 + corpora/framenet_v17 + corpora/gazetteers + corpora/genesis + corpora/gutenberg + corpora/ieer + corpora/inaugural + corpora/indian + corpora/lin_thesaurus + corpora/mac_morpho + corpora/movie_reviews + corpora/mte_teip5 + corpora/names + corpora/nonbreaking_prefixes + corpora/nps_chat + corpora/omw + corpora/opinion_lexicon + corpora/pl196x + corpora/ppattach + corpora/product_reviews_1 + corpora/product_reviews_2 + corpora/pros_cons + corpora/ptb + corpora/qc + corpora/rte + corpora/senseval + corpora/sentence_polarity + corpora/sentiwordnet + corpora/shakespeare + corpora/sinica_treebank + corpora/state_union + corpora/stopwords + corpora/subjectivity + corpora/swadesh + corpora/switchboard + corpora/timit + corpora/toolbox + corpora/treebank + corpora/twitter_samples + corpora/udhr + corpora/udhr2 + corpora/verbnet + corpora/webtext + corpora/wordnet + corpora/wordnet_ic + corpora/words + grammars/book_grammars + grammars/large_grammars + grammars/sample_grammars + misc/perluniprops + models/bllip_wsj_no_aux + models/moses_sample + models/wmt15_eval + models/word2vec_sample + stemmers/porter_test + stemmers/rslp + taggers/averaged_perceptron_tagger + taggers/averaged_perceptron_tagger_ru + taggers/universal_tagset + tokenizers/punkt +) + +PACKAGES_UNPACK_EXTRA=( + chunkers/maxent_ne_chunker + corpora/biocreative_ppi + corpora/brown_tei + corpora/kimmo + corpora/paradigms + corpora/pe08 + corpora/pil + corpora/problem_reports + corpora/smultron + corpora/unicode_samples + corpora/verbnet3 + corpora/ycoe + grammars/basque_grammars + grammars/spanish_grammars + help/tagsets + misc/mwa_ppdb + taggers/maxent_treebank_pos_tagger +) + +add_data() { + local x + for x; do + SRC_URI+=" + https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/${x}.zip + -> nltk-${x#*/}-${PV}.zip" + done +} + +add_data "${PACKAGES_ZIP[@]}" "${PACKAGES_UNPACK[@]}" +SRC_URI+=" + extra? (" +add_data "${PACKAGES_UNPACK_EXTRA[@]}" +SRC_URI+=" + )" + +CHECKREQS_DISK_USR=3G +CHECKREQS_DISK_BUILD=${CHECKREQS_DISK_USR} + +src_unpack() { + local x + local to_unpack=( "${PACKAGES_UNPACK[@]}" ) + use extra && to_unpack+=( "${PACKAGES_UNPACK_EXTRA[@]}" ) + for x in "${to_unpack[@]}"; do + local cat=${x%/*} + local pkg=${x#*/} + + mkdir -p "${S}/${cat}" || die + cd "${S}/${cat}" || die + unpack "nltk-${pkg}-${PV}.zip" + done +} + +src_install() { + dodir /usr/share/nltk_data + mv * "${ED}/usr/share/nltk_data/" || die + + local x + for x in "${PACKAGES_ZIP[@]}"; do + local cat=${x%/*} + local pkg=${x#*/} + + insinto "/usr/share/nltk_data/${cat}" + newins "${DISTDIR}/nltk-${pkg}-${PV}.zip" "${pkg}.zip" + done +} |