diff options
author | V3n3RiX <venerix@redcorelinux.org> | 2017-10-09 18:53:29 +0100 |
---|---|---|
committer | V3n3RiX <venerix@redcorelinux.org> | 2017-10-09 18:53:29 +0100 |
commit | 4f2d7949f03e1c198bc888f2d05f421d35c57e21 (patch) | |
tree | ba5f07bf3f9d22d82e54a462313f5d244036c768 /sci-biology/cd-hit |
reinit the tree, so we can have metadata
Diffstat (limited to 'sci-biology/cd-hit')
-rw-r--r-- | sci-biology/cd-hit/Manifest | 7 | ||||
-rw-r--r-- | sci-biology/cd-hit/cd-hit-4.6.6.ebuild | 47 | ||||
-rw-r--r-- | sci-biology/cd-hit/files/cd-hit-4.6.6-fix-build-system.patch | 122 | ||||
-rw-r--r-- | sci-biology/cd-hit/files/cd-hit-4.6.6-fix-perl-shebangs.patch | 219 | ||||
-rw-r--r-- | sci-biology/cd-hit/metadata.xml | 27 |
5 files changed, 422 insertions, 0 deletions
diff --git a/sci-biology/cd-hit/Manifest b/sci-biology/cd-hit/Manifest new file mode 100644 index 000000000000..2f50b126b920 --- /dev/null +++ b/sci-biology/cd-hit/Manifest @@ -0,0 +1,7 @@ +AUX cd-hit-4.6.6-fix-build-system.patch 4006 SHA256 49944712f599aa023086f825bc56f9e4b3c629c2c28da6203ae3e379fab73aed SHA512 84aadb9967ad65d04127468edb161b8725a5b1468586320eb89d4b4f5f97ad7b60da9728fa1088ecf4aff179a6bda36675add9a255631cf9359c991b61451c6d WHIRLPOOL 674e127461403a449b7789009a25688b8a76fc63d5161cc7c51e1b0c08d39d510982aafa65ca81a5c75d12cab8ad4d5c6a8050e3c89d5e5274213c6cfb5ad3d9 +AUX cd-hit-4.6.6-fix-perl-shebangs.patch 4909 SHA256 94a650eda4f8beecf894eb216dea44ebcc507757cd143b59ced94e2cb779168f SHA512 3ded78d991ee922761a3a7395b4d422ff29c6fe1c0657b61ad74bc6f9253c7dc82414f8d71cfffae37cac192e96ab03f711257d3091cdd0c9444202bdf5967f5 WHIRLPOOL 95baf2dedea0f9f32c14631490b650279d1ea6afc4fede2196301add1c81beb753f89a15e99195c98e044fdd6b91ef8b4706008fb7b7c015775f0305aadf7009 +DIST cd-hit-4.6.6.tar.gz 1152570 SHA256 97946f8ae62c3efa20ad4c527b8ea22200cf1b75c9941fb14de2bdaf1d6910f1 SHA512 8241d6674fb041559792dbbb58c12b41302d2275d3bacb1362946094b48a0b8e1236e71b5dc77d13405220b60f8253e6f996753a8b051995a72c8353d4333c51 WHIRLPOOL 4ee404e34a9c22aef0b809568aa1a7cefcd8d08e4a8c97d1c65b7692ae14b0567e5cc446054083219b56e89895117f69f786e6183f6c67d03d1f8404f7ba9632 +EBUILD cd-hit-4.6.6.ebuild 997 SHA256 a3a6f4ae0e93e513905a55018ef1fc54006807ca42854c5464e21aab69fbd747 SHA512 fe17e01fc04944df9b86b98dc73a4be2f14300938e3e926f56c952eddad62caf846f0051a7c3a66dbca3132f3a79a8bc21d7a43206bd6d9d784b24c94c3204b8 WHIRLPOOL 2964aea497885cf086595fcd5f3e7df8ff9054332fb61555a82bb3b6fd7f54b8879c0d3323cb65900c92c63c3d85e092fe6efa2672fe6ce96573a0d84516abc5 +MISC ChangeLog 2808 SHA256 ae22b2153dfaa545d390a685df77d0cdc3309e922faaf0b20e61de7147fe3087 SHA512 afeee1fadfdb696838b35b2ad8427af91aec119a0d5c6db778cfb8041822b89d78bac8318b817df483f1d4e8663981d656af865e39f6b45d1cdca184272a6de5 WHIRLPOOL 9eeb9e48ec79dbe2c14b82d2409bbfdc1c8dbc40583a57b4bcb1203b407801a68a9a7de65e2283ebe8a82cbc08ace18c0682299058126a0b3103f9fc2bde4f2a +MISC ChangeLog-2015 869 SHA256 a3970c7e0acc384777e16dc34932f9d5bd9e51300123b1c20fd620be2e0ad02f SHA512 651b1a62dcbceec38ad3e542a3412cdee5d76e1b47cdfe8888b443ab3cf1f7f6d4e6251c101717a8de63847a674aac91e16e5d57ffff0b91615a6745e52bfec4 WHIRLPOOL d243585b6753aa0de8f5572f3307b8a8602dc67771425d8240886460013325572450acb20c7d6d1d1df0c9229ad6451a9e60778cba7580506a8998097d3fa9e0 +MISC metadata.xml 1351 SHA256 075c677ea54084f5b587e80741ffd32686d8bdded9dee8e7242d3dc33fbbdcbb SHA512 691c92d76f2bdb3ed31a802fb1003905942abfb7cef3c1d08848e69cc819082e82540217f7fc79ad39d11a2d43c45b845617bd8ad1282a7bde505ed8616b5364 WHIRLPOOL 9e0529fb4354c4e392b6985165e75d780bde0a3f3d9ce1cb40191ff752e771e62855328e24cf61746d44f690542bb1f833f5be690bb5cd1def64e5ac765371f6 diff --git a/sci-biology/cd-hit/cd-hit-4.6.6.ebuild b/sci-biology/cd-hit/cd-hit-4.6.6.ebuild new file mode 100644 index 000000000000..ebe345f15245 --- /dev/null +++ b/sci-biology/cd-hit/cd-hit-4.6.6.ebuild @@ -0,0 +1,47 @@ +# Copyright 1999-2017 Gentoo Foundation +# Distributed under the terms of the GNU General Public License v2 + +EAPI=6 + +inherit flag-o-matic toolchain-funcs + +RELDATE="2016-0711" +RELEASE="${PN}-v${PV}-${RELDATE}" + +DESCRIPTION="Clustering Database at High Identity with Tolerance" +HOMEPAGE="http://weizhong-lab.ucsd.edu/cd-hit/" +SRC_URI="https://github.com/weizhongli/cdhit/releases/download/V${PV}/${RELEASE}.tar.gz -> ${P}.tar.gz" + +LICENSE="GPL-2" +SLOT="0" +KEYWORDS="~amd64 ~x86 ~amd64-linux ~x86-linux" +IUSE="openmp" + +RDEPEND="dev-lang/perl" + +S="${WORKDIR}"/${RELEASE} + +PATCHES=( + "${FILESDIR}"/${PN}-4.6.6-fix-perl-shebangs.patch + "${FILESDIR}"/${PN}-4.6.6-fix-build-system.patch +) + +pkg_pretend() { + [[ ${MERGE_TYPE} != binary ]] && use openmp && tc-check-openmp +} + +pkg_setup() { + [[ ${MERGE_TYPE} != binary ]] && use openmp && tc-check-openmp +} + +src_compile() { + tc-export CXX + emake openmp=$(usex openmp) +} + +src_install() { + dodir /usr/bin + PREFIX="${EPREFIX}"/usr/bin default + + dodoc doc/*.pdf +} diff --git a/sci-biology/cd-hit/files/cd-hit-4.6.6-fix-build-system.patch b/sci-biology/cd-hit/files/cd-hit-4.6.6-fix-build-system.patch new file mode 100644 index 000000000000..c668d5c6154e --- /dev/null +++ b/sci-biology/cd-hit/files/cd-hit-4.6.6-fix-build-system.patch @@ -0,0 +1,122 @@ +Fix build system, in order to honour user variables + +--- a/makefile ++++ b/makefile +@@ -1,7 +1,4 @@ +- +-CC = g++ -Wall -ggdb +-CC = g++ -pg +-CC = g++ ++CXX ?= g++ + + # without OpenMP + +@@ -9,35 +6,19 @@ + # in command line: + # make openmp=yes + ifeq ($(openmp),no) +- CCFLAGS = -DNO_OPENMP +-else +- CCFLAGS = -fopenmp +-endif +- +-# support debugging +-# in command line: +-# make debug=yes +-# make openmp=yes debug=yes +-ifeq ($(debug),yes) +-CCFLAGS += -ggdb ++ my_CPPFLAGS = -DNO_OPENMP + else +-CCFLAGS += -O2 ++ my_CXXFLAGS = -fopenmp + endif + + ifdef MAX_SEQ +-CCFLAGS += -DMAX_SEQ=$(MAX_SEQ) ++my_CPPFLAGS += -DMAX_SEQ=$(MAX_SEQ) + endif + +-#LDFLAGS = -static -o +-LDFLAGS += -o +- + PROGS = cd-hit cd-hit-est cd-hit-2d cd-hit-est-2d cd-hit-div cd-hit-454 + +-# Propagate hardening flags +-CCFLAGS := $(CPPFLAGS) $(CCFLAGS) $(CXXFLAGS) +- + .c++.o: +- $(CC) $(CCFLAGS) -c $< ++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(my_CPPFLAGS) $(CPPFLAGS) -c $< + + all: $(PROGS) + +@@ -47,52 +28,52 @@ + # programs + + cd-hit: cdhit-common.o cdhit-utility.o cdhit.o +- $(CC) $(CCFLAGS) cdhit.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit ++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) cdhit.o cdhit-common.o cdhit-utility.o -o cd-hit + + cd-hit-2d: cdhit-common.o cdhit-utility.o cdhit-2d.o +- $(CC) $(CCFLAGS) cdhit-2d.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-2d ++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) cdhit-2d.o cdhit-common.o cdhit-utility.o -o cd-hit-2d + + cd-hit-est: cdhit-common.o cdhit-utility.o cdhit-est.o +- $(CC) $(CCFLAGS) cdhit-est.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-est ++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) cdhit-est.o cdhit-common.o cdhit-utility.o -o cd-hit-est + + cd-hit-est-2d: cdhit-common.o cdhit-utility.o cdhit-est-2d.o +- $(CC) $(CCFLAGS) cdhit-est-2d.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-est-2d ++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) cdhit-est-2d.o cdhit-common.o cdhit-utility.o -o cd-hit-est-2d + + cd-hit-div: cdhit-common.o cdhit-utility.o cdhit-div.o +- $(CC) $(CCFLAGS) cdhit-div.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-div ++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) cdhit-div.o cdhit-common.o cdhit-utility.o -o cd-hit-div + + cd-hit-454: cdhit-common.o cdhit-utility.o cdhit-454.o +- $(CC) $(CCFLAGS) cdhit-454.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-454 ++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) cdhit-454.o cdhit-common.o cdhit-utility.o -o cd-hit-454 + + # objects + cdhit-common.o: cdhit-common.c++ cdhit-common.h +- $(CC) $(CCFLAGS) cdhit-common.c++ -c ++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(my_CPPFLAGS) $(CPPFLAGS) cdhit-common.c++ -c + + cdhit-utility.o: cdhit-utility.c++ cdhit-utility.h +- $(CC) $(CCFLAGS) cdhit-utility.c++ -c ++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(my_CPPFLAGS) $(CPPFLAGS) cdhit-utility.c++ -c + + cdhit.o: cdhit.c++ cdhit-utility.h +- $(CC) $(CCFLAGS) cdhit.c++ -c ++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(my_CPPFLAGS) $(CPPFLAGS) cdhit.c++ -c + + cdhit-2d.o: cdhit-2d.c++ cdhit-utility.h +- $(CC) $(CCFLAGS) cdhit-2d.c++ -c ++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(my_CPPFLAGS) $(CPPFLAGS) cdhit-2d.c++ -c + + cdhit-est.o: cdhit-est.c++ cdhit-utility.h +- $(CC) $(CCFLAGS) cdhit-est.c++ -c ++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(my_CPPFLAGS) $(CPPFLAGS) cdhit-est.c++ -c + + cdhit-est-2d.o: cdhit-est-2d.c++ cdhit-utility.h +- $(CC) $(CCFLAGS) cdhit-est-2d.c++ -c ++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(my_CPPFLAGS) $(CPPFLAGS) cdhit-est-2d.c++ -c + + cdhit-div.o: cdhit-div.c++ cdhit-common.h +- $(CC) $(CCFLAGS) cdhit-div.c++ -c ++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(my_CPPFLAGS) $(CPPFLAGS) cdhit-div.c++ -c + + cdhit-454.o: cdhit-454.c++ cdhit-common.h +- $(CC) $(CCFLAGS) cdhit-454.c++ -c ++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(my_CPPFLAGS) $(CPPFLAGS) cdhit-454.c++ -c + + PREFIX ?= /usr/local/bin + + install: + for prog in $(PROGS); do \ +- install -m 0755 $$prog $(PREFIX); \ ++ install -m 0755 $$prog $(DESTDIR)$(PREFIX); \ + done +- install -m 0755 *.pl $(PREFIX); ++ install -m 0755 *.pl $(DESTDIR)$(PREFIX); diff --git a/sci-biology/cd-hit/files/cd-hit-4.6.6-fix-perl-shebangs.patch b/sci-biology/cd-hit/files/cd-hit-4.6.6-fix-perl-shebangs.patch new file mode 100644 index 000000000000..3784296f2e94 --- /dev/null +++ b/sci-biology/cd-hit/files/cd-hit-4.6.6-fix-perl-shebangs.patch @@ -0,0 +1,219 @@ +Make perl shebangs more Prefix friendly +See also: https://blogs.gentoo.org/mgorny/2016/02/08/a-quick-note-on-portable-shebangs/ + +--- a/cd-hit-2d-para.pl ++++ b/cd-hit-2d-para.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl -w ++#!/usr/bin/env perl + # ============================================================================= + # CD-HIT + # http://cd-hit.org/ +--- a/cd-hit-div.pl ++++ b/cd-hit-div.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl ++#!/usr/bin/env perl + + #not like cd-hit-div, this script do not sort input + #or throw away seq +--- a/cd-hit-para.pl ++++ b/cd-hit-para.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl -w ++#!/usr/bin/env perl + # ============================================================================= + # CD-HIT + # http://cd-hit.org/ +--- a/clstr2tree.pl ++++ b/clstr2tree.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl ++#!/usr/bin/env perl + + $clstr = shift; + $fr = shift; # for nr80.clstr $fr = 0.8 +--- a/clstr2txt.pl ++++ b/clstr2txt.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl ++#!/usr/bin/env perl + + my $no = 0; + my $clstr_no = ""; +--- a/clstr2xml.pl ++++ b/clstr2xml.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl ++#!/usr/bin/env perl + + #usage: clstr_xml.pl [-len|-size] level1.clstr [level2.clstr level3.clstr ...] + #purpose: to create xml file from cd-hit or hierarchical cd-hit(h-cd-hit) results +--- a/clstr_cut.pl ++++ b/clstr_cut.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl ++#!/usr/bin/env perl + + #keep only top $no proteins in cluster + +--- a/clstr_merge_noorder.pl ++++ b/clstr_merge_noorder.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl ++#!/usr/bin/env perl + + # order of clusters don't need to be the same + # but then I have to read everything into memory +--- a/clstr_merge.pl ++++ b/clstr_merge.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl ++#!/usr/bin/env perl + + # the order of clusters need to be identical + my ($master_clstr, @clstr) = @ARGV; +--- a/clstr_quality_eval_by_link.pl ++++ b/clstr_quality_eval_by_link.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl ++#!/usr/bin/env perl + + ## calculate the sensitivity and specificity of clusters + ## if the input fasta file has pre-defined classification term +--- a/clstr_quality_eval.pl ++++ b/clstr_quality_eval.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl ++#!/usr/bin/env perl + + ## calculate the sensitivity and specificity of clusters + ## if the input fasta file has pre-defined classification term +--- a/clstr_reduce.pl ++++ b/clstr_reduce.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl ++#!/usr/bin/env perl + + + $file90 = shift; +--- a/clstr_renumber.pl ++++ b/clstr_renumber.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl ++#!/usr/bin/env perl + $no = 0; + while($ll=<>){ + if ($ll =~ /^>Cluster (\d+)/) { +--- a/clstr_rep.pl ++++ b/clstr_rep.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl ++#!/usr/bin/env perl + + $rep = ""; + $no = 0; +--- a/clstr_reps_faa_rev.pl ++++ b/clstr_reps_faa_rev.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl ++#!/usr/bin/env perl + # output single fasta file + # for each cluster output at least $cutoff seqs + +--- a/clstr_rev.pl ++++ b/clstr_rev.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl ++#!/usr/bin/env perl + # if nr90 from nr100 and + # nr80 from nr90, so I have nr90.clstr and nr80.clstr + # but, in nr80.clstr, some gi numbers whose from nr100 are there +--- a/clstr_select.pl ++++ b/clstr_select.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl ++#!/usr/bin/env perl + + #my $by = shift; + my $min; +--- a/clstr_select_rep.pl ++++ b/clstr_select_rep.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl ++#!/usr/bin/env perl + + #my $by = shift; + my $min; +--- a/clstr_size_histogram.pl ++++ b/clstr_size_histogram.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl ++#!/usr/bin/env perl + + if(@ARGV==0){ + print "Usage:\n\tclstr_size_histogram.pl [-bin N] clstr_file\n"; +--- a/clstr_size_stat.pl ++++ b/clstr_size_stat.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl ++#!/usr/bin/env perl + + if(@ARGV==0){ + print "Usage:\n\tclstr_size_stat.pl clstr_file\n"; +--- a/clstr_sort_by.pl ++++ b/clstr_sort_by.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl ++#!/usr/bin/env perl + + my $sort_by_what = shift; + $sort_by_what = "no" unless $sort_by_what; +--- a/clstr_sort_prot_by.pl ++++ b/clstr_sort_prot_by.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl ++#!/usr/bin/env perl + + my $sort_by = shift; + $sort_by = "len" unless ($sort_by); +--- a/clstr_sql_tbl.pl ++++ b/clstr_sql_tbl.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl ++#!/usr/bin/env perl + + if(@ARGV==0){ + print "Usage:\n\tclstr_sql_tbl.pl clstr_file tbl_file\n"; +--- a/clstr_sql_tbl_sort.pl ++++ b/clstr_sql_tbl_sort.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl ++#!/usr/bin/env perl + + if(@ARGV==0){ + print "Usage:\n\tclstr_sql_tbl_sort.pl table_file level\n"; +--- a/make_multi_seq.pl ++++ b/make_multi_seq.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl ++#!/usr/bin/env perl + + #note you have to use "-d 0" in the cd-hit run + #note you better to use "-g 1" in the cd-hit run +--- a/plot_2d.pl ++++ b/plot_2d.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl ++#!/usr/bin/env perl + + use Image::Magick; + +--- a/plot_len1.pl ++++ b/plot_len1.pl +@@ -1,4 +1,4 @@ +-#!/usr/bin/perl ++#!/usr/bin/env perl + + $file90 = shift; + $segs = shift; diff --git a/sci-biology/cd-hit/metadata.xml b/sci-biology/cd-hit/metadata.xml new file mode 100644 index 000000000000..1f3b075216dd --- /dev/null +++ b/sci-biology/cd-hit/metadata.xml @@ -0,0 +1,27 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!DOCTYPE pkgmetadata SYSTEM "http://www.gentoo.org/dtd/metadata.dtd"> +<pkgmetadata> + <maintainer type="project"> + <email>sci-biology@gentoo.org</email> + <name>Gentoo Biology Project</name> + </maintainer> + <longdescription> +CD-HIT is a very widely used program for clustering and comparing large sets +of protein or nucleotide sequences. CD-HIT is very fast and can handle +extremely large databases. CD-HIT helps to significantly reduce the +computational and manual efforts in many sequence analysis tasks and aids in +understanding the data structure and correct the bias within a dataset. +The CD-HIT package has CD-HIT, CD-HIT-2D, CD-HIT-EST, CD-HIT-EST-2D, +CD-HIT-454, CD-HIT-PARA, PSI-CD-HIT and over a dozen scripts. CD-HIT +(CD-HIT-EST) clusters similar proteins (DNAs) into clusters that meet a +user-defined similarity threshold. CD-HIT-2D (CD-HIT-EST-2D) compares 2 +datasets and identifies the sequences in db2 that are similar to db1 above +a threshold. CD-HIT-454 is a program to identify natural and artificial +duplicates from pyrosequencing reads. The usage of other programs and +scripts can be found in CD-HIT user's guide. + </longdescription> + <upstream> + <remote-id type="google-code">cdhit</remote-id> + <remote-id type="github">weizhongli/cdhit</remote-id> + </upstream> +</pkgmetadata> |