summaryrefslogtreecommitdiff
path: root/sci-biology/cd-hit
diff options
context:
space:
mode:
authorV3n3RiX <venerix@redcorelinux.org>2017-10-09 18:53:29 +0100
committerV3n3RiX <venerix@redcorelinux.org>2017-10-09 18:53:29 +0100
commit4f2d7949f03e1c198bc888f2d05f421d35c57e21 (patch)
treeba5f07bf3f9d22d82e54a462313f5d244036c768 /sci-biology/cd-hit
reinit the tree, so we can have metadata
Diffstat (limited to 'sci-biology/cd-hit')
-rw-r--r--sci-biology/cd-hit/Manifest7
-rw-r--r--sci-biology/cd-hit/cd-hit-4.6.6.ebuild47
-rw-r--r--sci-biology/cd-hit/files/cd-hit-4.6.6-fix-build-system.patch122
-rw-r--r--sci-biology/cd-hit/files/cd-hit-4.6.6-fix-perl-shebangs.patch219
-rw-r--r--sci-biology/cd-hit/metadata.xml27
5 files changed, 422 insertions, 0 deletions
diff --git a/sci-biology/cd-hit/Manifest b/sci-biology/cd-hit/Manifest
new file mode 100644
index 000000000000..2f50b126b920
--- /dev/null
+++ b/sci-biology/cd-hit/Manifest
@@ -0,0 +1,7 @@
+AUX cd-hit-4.6.6-fix-build-system.patch 4006 SHA256 49944712f599aa023086f825bc56f9e4b3c629c2c28da6203ae3e379fab73aed SHA512 84aadb9967ad65d04127468edb161b8725a5b1468586320eb89d4b4f5f97ad7b60da9728fa1088ecf4aff179a6bda36675add9a255631cf9359c991b61451c6d WHIRLPOOL 674e127461403a449b7789009a25688b8a76fc63d5161cc7c51e1b0c08d39d510982aafa65ca81a5c75d12cab8ad4d5c6a8050e3c89d5e5274213c6cfb5ad3d9
+AUX cd-hit-4.6.6-fix-perl-shebangs.patch 4909 SHA256 94a650eda4f8beecf894eb216dea44ebcc507757cd143b59ced94e2cb779168f SHA512 3ded78d991ee922761a3a7395b4d422ff29c6fe1c0657b61ad74bc6f9253c7dc82414f8d71cfffae37cac192e96ab03f711257d3091cdd0c9444202bdf5967f5 WHIRLPOOL 95baf2dedea0f9f32c14631490b650279d1ea6afc4fede2196301add1c81beb753f89a15e99195c98e044fdd6b91ef8b4706008fb7b7c015775f0305aadf7009
+DIST cd-hit-4.6.6.tar.gz 1152570 SHA256 97946f8ae62c3efa20ad4c527b8ea22200cf1b75c9941fb14de2bdaf1d6910f1 SHA512 8241d6674fb041559792dbbb58c12b41302d2275d3bacb1362946094b48a0b8e1236e71b5dc77d13405220b60f8253e6f996753a8b051995a72c8353d4333c51 WHIRLPOOL 4ee404e34a9c22aef0b809568aa1a7cefcd8d08e4a8c97d1c65b7692ae14b0567e5cc446054083219b56e89895117f69f786e6183f6c67d03d1f8404f7ba9632
+EBUILD cd-hit-4.6.6.ebuild 997 SHA256 a3a6f4ae0e93e513905a55018ef1fc54006807ca42854c5464e21aab69fbd747 SHA512 fe17e01fc04944df9b86b98dc73a4be2f14300938e3e926f56c952eddad62caf846f0051a7c3a66dbca3132f3a79a8bc21d7a43206bd6d9d784b24c94c3204b8 WHIRLPOOL 2964aea497885cf086595fcd5f3e7df8ff9054332fb61555a82bb3b6fd7f54b8879c0d3323cb65900c92c63c3d85e092fe6efa2672fe6ce96573a0d84516abc5
+MISC ChangeLog 2808 SHA256 ae22b2153dfaa545d390a685df77d0cdc3309e922faaf0b20e61de7147fe3087 SHA512 afeee1fadfdb696838b35b2ad8427af91aec119a0d5c6db778cfb8041822b89d78bac8318b817df483f1d4e8663981d656af865e39f6b45d1cdca184272a6de5 WHIRLPOOL 9eeb9e48ec79dbe2c14b82d2409bbfdc1c8dbc40583a57b4bcb1203b407801a68a9a7de65e2283ebe8a82cbc08ace18c0682299058126a0b3103f9fc2bde4f2a
+MISC ChangeLog-2015 869 SHA256 a3970c7e0acc384777e16dc34932f9d5bd9e51300123b1c20fd620be2e0ad02f SHA512 651b1a62dcbceec38ad3e542a3412cdee5d76e1b47cdfe8888b443ab3cf1f7f6d4e6251c101717a8de63847a674aac91e16e5d57ffff0b91615a6745e52bfec4 WHIRLPOOL d243585b6753aa0de8f5572f3307b8a8602dc67771425d8240886460013325572450acb20c7d6d1d1df0c9229ad6451a9e60778cba7580506a8998097d3fa9e0
+MISC metadata.xml 1351 SHA256 075c677ea54084f5b587e80741ffd32686d8bdded9dee8e7242d3dc33fbbdcbb SHA512 691c92d76f2bdb3ed31a802fb1003905942abfb7cef3c1d08848e69cc819082e82540217f7fc79ad39d11a2d43c45b845617bd8ad1282a7bde505ed8616b5364 WHIRLPOOL 9e0529fb4354c4e392b6985165e75d780bde0a3f3d9ce1cb40191ff752e771e62855328e24cf61746d44f690542bb1f833f5be690bb5cd1def64e5ac765371f6
diff --git a/sci-biology/cd-hit/cd-hit-4.6.6.ebuild b/sci-biology/cd-hit/cd-hit-4.6.6.ebuild
new file mode 100644
index 000000000000..ebe345f15245
--- /dev/null
+++ b/sci-biology/cd-hit/cd-hit-4.6.6.ebuild
@@ -0,0 +1,47 @@
+# Copyright 1999-2017 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=6
+
+inherit flag-o-matic toolchain-funcs
+
+RELDATE="2016-0711"
+RELEASE="${PN}-v${PV}-${RELDATE}"
+
+DESCRIPTION="Clustering Database at High Identity with Tolerance"
+HOMEPAGE="http://weizhong-lab.ucsd.edu/cd-hit/"
+SRC_URI="https://github.com/weizhongli/cdhit/releases/download/V${PV}/${RELEASE}.tar.gz -> ${P}.tar.gz"
+
+LICENSE="GPL-2"
+SLOT="0"
+KEYWORDS="~amd64 ~x86 ~amd64-linux ~x86-linux"
+IUSE="openmp"
+
+RDEPEND="dev-lang/perl"
+
+S="${WORKDIR}"/${RELEASE}
+
+PATCHES=(
+ "${FILESDIR}"/${PN}-4.6.6-fix-perl-shebangs.patch
+ "${FILESDIR}"/${PN}-4.6.6-fix-build-system.patch
+)
+
+pkg_pretend() {
+ [[ ${MERGE_TYPE} != binary ]] && use openmp && tc-check-openmp
+}
+
+pkg_setup() {
+ [[ ${MERGE_TYPE} != binary ]] && use openmp && tc-check-openmp
+}
+
+src_compile() {
+ tc-export CXX
+ emake openmp=$(usex openmp)
+}
+
+src_install() {
+ dodir /usr/bin
+ PREFIX="${EPREFIX}"/usr/bin default
+
+ dodoc doc/*.pdf
+}
diff --git a/sci-biology/cd-hit/files/cd-hit-4.6.6-fix-build-system.patch b/sci-biology/cd-hit/files/cd-hit-4.6.6-fix-build-system.patch
new file mode 100644
index 000000000000..c668d5c6154e
--- /dev/null
+++ b/sci-biology/cd-hit/files/cd-hit-4.6.6-fix-build-system.patch
@@ -0,0 +1,122 @@
+Fix build system, in order to honour user variables
+
+--- a/makefile
++++ b/makefile
+@@ -1,7 +1,4 @@
+-
+-CC = g++ -Wall -ggdb
+-CC = g++ -pg
+-CC = g++
++CXX ?= g++
+
+ # without OpenMP
+
+@@ -9,35 +6,19 @@
+ # in command line:
+ # make openmp=yes
+ ifeq ($(openmp),no)
+- CCFLAGS = -DNO_OPENMP
+-else
+- CCFLAGS = -fopenmp
+-endif
+-
+-# support debugging
+-# in command line:
+-# make debug=yes
+-# make openmp=yes debug=yes
+-ifeq ($(debug),yes)
+-CCFLAGS += -ggdb
++ my_CPPFLAGS = -DNO_OPENMP
+ else
+-CCFLAGS += -O2
++ my_CXXFLAGS = -fopenmp
+ endif
+
+ ifdef MAX_SEQ
+-CCFLAGS += -DMAX_SEQ=$(MAX_SEQ)
++my_CPPFLAGS += -DMAX_SEQ=$(MAX_SEQ)
+ endif
+
+-#LDFLAGS = -static -o
+-LDFLAGS += -o
+-
+ PROGS = cd-hit cd-hit-est cd-hit-2d cd-hit-est-2d cd-hit-div cd-hit-454
+
+-# Propagate hardening flags
+-CCFLAGS := $(CPPFLAGS) $(CCFLAGS) $(CXXFLAGS)
+-
+ .c++.o:
+- $(CC) $(CCFLAGS) -c $<
++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(my_CPPFLAGS) $(CPPFLAGS) -c $<
+
+ all: $(PROGS)
+
+@@ -47,52 +28,52 @@
+ # programs
+
+ cd-hit: cdhit-common.o cdhit-utility.o cdhit.o
+- $(CC) $(CCFLAGS) cdhit.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit
++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) cdhit.o cdhit-common.o cdhit-utility.o -o cd-hit
+
+ cd-hit-2d: cdhit-common.o cdhit-utility.o cdhit-2d.o
+- $(CC) $(CCFLAGS) cdhit-2d.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-2d
++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) cdhit-2d.o cdhit-common.o cdhit-utility.o -o cd-hit-2d
+
+ cd-hit-est: cdhit-common.o cdhit-utility.o cdhit-est.o
+- $(CC) $(CCFLAGS) cdhit-est.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-est
++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) cdhit-est.o cdhit-common.o cdhit-utility.o -o cd-hit-est
+
+ cd-hit-est-2d: cdhit-common.o cdhit-utility.o cdhit-est-2d.o
+- $(CC) $(CCFLAGS) cdhit-est-2d.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-est-2d
++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) cdhit-est-2d.o cdhit-common.o cdhit-utility.o -o cd-hit-est-2d
+
+ cd-hit-div: cdhit-common.o cdhit-utility.o cdhit-div.o
+- $(CC) $(CCFLAGS) cdhit-div.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-div
++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) cdhit-div.o cdhit-common.o cdhit-utility.o -o cd-hit-div
+
+ cd-hit-454: cdhit-common.o cdhit-utility.o cdhit-454.o
+- $(CC) $(CCFLAGS) cdhit-454.o cdhit-common.o cdhit-utility.o $(LDFLAGS) cd-hit-454
++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) cdhit-454.o cdhit-common.o cdhit-utility.o -o cd-hit-454
+
+ # objects
+ cdhit-common.o: cdhit-common.c++ cdhit-common.h
+- $(CC) $(CCFLAGS) cdhit-common.c++ -c
++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(my_CPPFLAGS) $(CPPFLAGS) cdhit-common.c++ -c
+
+ cdhit-utility.o: cdhit-utility.c++ cdhit-utility.h
+- $(CC) $(CCFLAGS) cdhit-utility.c++ -c
++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(my_CPPFLAGS) $(CPPFLAGS) cdhit-utility.c++ -c
+
+ cdhit.o: cdhit.c++ cdhit-utility.h
+- $(CC) $(CCFLAGS) cdhit.c++ -c
++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(my_CPPFLAGS) $(CPPFLAGS) cdhit.c++ -c
+
+ cdhit-2d.o: cdhit-2d.c++ cdhit-utility.h
+- $(CC) $(CCFLAGS) cdhit-2d.c++ -c
++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(my_CPPFLAGS) $(CPPFLAGS) cdhit-2d.c++ -c
+
+ cdhit-est.o: cdhit-est.c++ cdhit-utility.h
+- $(CC) $(CCFLAGS) cdhit-est.c++ -c
++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(my_CPPFLAGS) $(CPPFLAGS) cdhit-est.c++ -c
+
+ cdhit-est-2d.o: cdhit-est-2d.c++ cdhit-utility.h
+- $(CC) $(CCFLAGS) cdhit-est-2d.c++ -c
++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(my_CPPFLAGS) $(CPPFLAGS) cdhit-est-2d.c++ -c
+
+ cdhit-div.o: cdhit-div.c++ cdhit-common.h
+- $(CC) $(CCFLAGS) cdhit-div.c++ -c
++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(my_CPPFLAGS) $(CPPFLAGS) cdhit-div.c++ -c
+
+ cdhit-454.o: cdhit-454.c++ cdhit-common.h
+- $(CC) $(CCFLAGS) cdhit-454.c++ -c
++ $(CXX) $(my_CXXFLAGS) $(CXXFLAGS) $(my_CPPFLAGS) $(CPPFLAGS) cdhit-454.c++ -c
+
+ PREFIX ?= /usr/local/bin
+
+ install:
+ for prog in $(PROGS); do \
+- install -m 0755 $$prog $(PREFIX); \
++ install -m 0755 $$prog $(DESTDIR)$(PREFIX); \
+ done
+- install -m 0755 *.pl $(PREFIX);
++ install -m 0755 *.pl $(DESTDIR)$(PREFIX);
diff --git a/sci-biology/cd-hit/files/cd-hit-4.6.6-fix-perl-shebangs.patch b/sci-biology/cd-hit/files/cd-hit-4.6.6-fix-perl-shebangs.patch
new file mode 100644
index 000000000000..3784296f2e94
--- /dev/null
+++ b/sci-biology/cd-hit/files/cd-hit-4.6.6-fix-perl-shebangs.patch
@@ -0,0 +1,219 @@
+Make perl shebangs more Prefix friendly
+See also: https://blogs.gentoo.org/mgorny/2016/02/08/a-quick-note-on-portable-shebangs/
+
+--- a/cd-hit-2d-para.pl
++++ b/cd-hit-2d-para.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl -w
++#!/usr/bin/env perl
+ # =============================================================================
+ # CD-HIT
+ # http://cd-hit.org/
+--- a/cd-hit-div.pl
++++ b/cd-hit-div.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl
++#!/usr/bin/env perl
+
+ #not like cd-hit-div, this script do not sort input
+ #or throw away seq
+--- a/cd-hit-para.pl
++++ b/cd-hit-para.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl -w
++#!/usr/bin/env perl
+ # =============================================================================
+ # CD-HIT
+ # http://cd-hit.org/
+--- a/clstr2tree.pl
++++ b/clstr2tree.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl
++#!/usr/bin/env perl
+
+ $clstr = shift;
+ $fr = shift; # for nr80.clstr $fr = 0.8
+--- a/clstr2txt.pl
++++ b/clstr2txt.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl
++#!/usr/bin/env perl
+
+ my $no = 0;
+ my $clstr_no = "";
+--- a/clstr2xml.pl
++++ b/clstr2xml.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl
++#!/usr/bin/env perl
+
+ #usage: clstr_xml.pl [-len|-size] level1.clstr [level2.clstr level3.clstr ...]
+ #purpose: to create xml file from cd-hit or hierarchical cd-hit(h-cd-hit) results
+--- a/clstr_cut.pl
++++ b/clstr_cut.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl
++#!/usr/bin/env perl
+
+ #keep only top $no proteins in cluster
+
+--- a/clstr_merge_noorder.pl
++++ b/clstr_merge_noorder.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl
++#!/usr/bin/env perl
+
+ # order of clusters don't need to be the same
+ # but then I have to read everything into memory
+--- a/clstr_merge.pl
++++ b/clstr_merge.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl
++#!/usr/bin/env perl
+
+ # the order of clusters need to be identical
+ my ($master_clstr, @clstr) = @ARGV;
+--- a/clstr_quality_eval_by_link.pl
++++ b/clstr_quality_eval_by_link.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl
++#!/usr/bin/env perl
+
+ ## calculate the sensitivity and specificity of clusters
+ ## if the input fasta file has pre-defined classification term
+--- a/clstr_quality_eval.pl
++++ b/clstr_quality_eval.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl
++#!/usr/bin/env perl
+
+ ## calculate the sensitivity and specificity of clusters
+ ## if the input fasta file has pre-defined classification term
+--- a/clstr_reduce.pl
++++ b/clstr_reduce.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl
++#!/usr/bin/env perl
+
+
+ $file90 = shift;
+--- a/clstr_renumber.pl
++++ b/clstr_renumber.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl
++#!/usr/bin/env perl
+ $no = 0;
+ while($ll=<>){
+ if ($ll =~ /^>Cluster (\d+)/) {
+--- a/clstr_rep.pl
++++ b/clstr_rep.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl
++#!/usr/bin/env perl
+
+ $rep = "";
+ $no = 0;
+--- a/clstr_reps_faa_rev.pl
++++ b/clstr_reps_faa_rev.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl
++#!/usr/bin/env perl
+ # output single fasta file
+ # for each cluster output at least $cutoff seqs
+
+--- a/clstr_rev.pl
++++ b/clstr_rev.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl
++#!/usr/bin/env perl
+ # if nr90 from nr100 and
+ # nr80 from nr90, so I have nr90.clstr and nr80.clstr
+ # but, in nr80.clstr, some gi numbers whose from nr100 are there
+--- a/clstr_select.pl
++++ b/clstr_select.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl
++#!/usr/bin/env perl
+
+ #my $by = shift;
+ my $min;
+--- a/clstr_select_rep.pl
++++ b/clstr_select_rep.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl
++#!/usr/bin/env perl
+
+ #my $by = shift;
+ my $min;
+--- a/clstr_size_histogram.pl
++++ b/clstr_size_histogram.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl
++#!/usr/bin/env perl
+
+ if(@ARGV==0){
+ print "Usage:\n\tclstr_size_histogram.pl [-bin N] clstr_file\n";
+--- a/clstr_size_stat.pl
++++ b/clstr_size_stat.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl
++#!/usr/bin/env perl
+
+ if(@ARGV==0){
+ print "Usage:\n\tclstr_size_stat.pl clstr_file\n";
+--- a/clstr_sort_by.pl
++++ b/clstr_sort_by.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl
++#!/usr/bin/env perl
+
+ my $sort_by_what = shift;
+ $sort_by_what = "no" unless $sort_by_what;
+--- a/clstr_sort_prot_by.pl
++++ b/clstr_sort_prot_by.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl
++#!/usr/bin/env perl
+
+ my $sort_by = shift;
+ $sort_by = "len" unless ($sort_by);
+--- a/clstr_sql_tbl.pl
++++ b/clstr_sql_tbl.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl
++#!/usr/bin/env perl
+
+ if(@ARGV==0){
+ print "Usage:\n\tclstr_sql_tbl.pl clstr_file tbl_file\n";
+--- a/clstr_sql_tbl_sort.pl
++++ b/clstr_sql_tbl_sort.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl
++#!/usr/bin/env perl
+
+ if(@ARGV==0){
+ print "Usage:\n\tclstr_sql_tbl_sort.pl table_file level\n";
+--- a/make_multi_seq.pl
++++ b/make_multi_seq.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl
++#!/usr/bin/env perl
+
+ #note you have to use "-d 0" in the cd-hit run
+ #note you better to use "-g 1" in the cd-hit run
+--- a/plot_2d.pl
++++ b/plot_2d.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl
++#!/usr/bin/env perl
+
+ use Image::Magick;
+
+--- a/plot_len1.pl
++++ b/plot_len1.pl
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl
++#!/usr/bin/env perl
+
+ $file90 = shift;
+ $segs = shift;
diff --git a/sci-biology/cd-hit/metadata.xml b/sci-biology/cd-hit/metadata.xml
new file mode 100644
index 000000000000..1f3b075216dd
--- /dev/null
+++ b/sci-biology/cd-hit/metadata.xml
@@ -0,0 +1,27 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<!DOCTYPE pkgmetadata SYSTEM "http://www.gentoo.org/dtd/metadata.dtd">
+<pkgmetadata>
+ <maintainer type="project">
+ <email>sci-biology@gentoo.org</email>
+ <name>Gentoo Biology Project</name>
+ </maintainer>
+ <longdescription>
+CD-HIT is a very widely used program for clustering and comparing large sets
+of protein or nucleotide sequences. CD-HIT is very fast and can handle
+extremely large databases. CD-HIT helps to significantly reduce the
+computational and manual efforts in many sequence analysis tasks and aids in
+understanding the data structure and correct the bias within a dataset.
+The CD-HIT package has CD-HIT, CD-HIT-2D, CD-HIT-EST, CD-HIT-EST-2D,
+CD-HIT-454, CD-HIT-PARA, PSI-CD-HIT and over a dozen scripts. CD-HIT
+(CD-HIT-EST) clusters similar proteins (DNAs) into clusters that meet a
+user-defined similarity threshold. CD-HIT-2D (CD-HIT-EST-2D) compares 2
+datasets and identifies the sequences in db2 that are similar to db1 above
+a threshold. CD-HIT-454 is a program to identify natural and artificial
+duplicates from pyrosequencing reads. The usage of other programs and
+scripts can be found in CD-HIT user's guide.
+ </longdescription>
+ <upstream>
+ <remote-id type="google-code">cdhit</remote-id>
+ <remote-id type="github">weizhongli/cdhit</remote-id>
+ </upstream>
+</pkgmetadata>