summaryrefslogtreecommitdiff
path: root/dev-util/Tensile
diff options
context:
space:
mode:
authorV3n3RiX <venerix@koprulu.sector>2022-03-20 00:40:44 +0000
committerV3n3RiX <venerix@koprulu.sector>2022-03-20 00:40:44 +0000
commit4cbcc855382a06088e2f016f62cafdbcb7e40665 (patch)
tree356496503d52354aa6d9f2d36126302fed5f3a73 /dev-util/Tensile
parentfcc5224904648a8e6eb528d7603154160a20022f (diff)
gentoo resync : 20.03.2022
Diffstat (limited to 'dev-util/Tensile')
-rw-r--r--dev-util/Tensile/Manifest5
-rw-r--r--dev-util/Tensile/Tensile-4.3.0.ebuild74
-rw-r--r--dev-util/Tensile/files/Tensile-4.3.0-hsaco-compile-specified-arch.patch96
-rw-r--r--dev-util/Tensile/files/Tensile-4.3.0-output-commands.patch23
-rw-r--r--dev-util/Tensile/metadata.xml11
5 files changed, 209 insertions, 0 deletions
diff --git a/dev-util/Tensile/Manifest b/dev-util/Tensile/Manifest
new file mode 100644
index 000000000000..b873575497a2
--- /dev/null
+++ b/dev-util/Tensile/Manifest
@@ -0,0 +1,5 @@
+AUX Tensile-4.3.0-hsaco-compile-specified-arch.patch 3859 BLAKE2B 4590978cff0fdb0501ba2405615775cb3f8900ce164c3d57cc59dfe82e1adc25376b6463855a045f5e0652cc985b0015566b95ff28327c59f2db875a84cb49b3 SHA512 57520f1d46f46c7cfd5b8c3ae307da07695c19479b687ce994ad9fba2e755ed7c776c3d8bee8e0642c7613a5d53d8469f329b1eb07563c13d2f7bda8b3d36687
+AUX Tensile-4.3.0-output-commands.patch 1629 BLAKE2B c43395d8e8aeb956d5eb84c8a34ad4b995ed32499ceababeb22d5ca3ac5ee6e8ef353a1467a77943a441f0840b9a6ad1f53202ec8673ef3753aa2bec1395fec4 SHA512 45bdae1d87f6e31d337b22f146b9dee25f4f87357c6ac52bd843556dd742431db10efb55fb8ef815cc9941b5b9f05970d43a6fec0f2d9195c30c633e33d138bf
+DIST rocm-Tensile-4.3.0.tar.gz 14250149 BLAKE2B 030138eaca2a0aadd96801e6dbd72e510716dd90553ef3795c5e04e00a34a05ecae82b24f755e4033a4acfcdb1cf26291da1e7902bb090f89a010d403e832beb SHA512 126db0b413c716fba8d5be9bff7a44fd1badacbf32f3db8d0db649819177db37ebd56fd22dd3c809655f5d29675be115e698cd10bc3d0b4b23878ae3726fce47
+EBUILD Tensile-4.3.0.ebuild 1995 BLAKE2B 66474ff936214aeede2613b80522a2bd376216115b7c0cdbb046443b81146226d8ca0c2ea7c4f28661dfd1ed0e94e9e225211acc1a21ba8f74107819cb00ca8b SHA512 ca29d0652e6ee50f287def381ea44cb4e6657a73e03e4c030c24b9ddf8ea95256e11891896e14b6498c4b9eebc5d20388045927c701fc62be7dcdcd67c385b11
+MISC metadata.xml 349 BLAKE2B 363f31b1d597e4c4959c588c9765154049c7c9692bbd64a0736226261b6232c75af7cbf255de59813cd10b9d45b30cf08d4ad0eb1422bf956264377a58002340 SHA512 d83250993581f9b23b43b8553cf46fe75411232ca7ec18c32c070d6acb0f9c7a01769755a3f11b4c5aafe40a63ee0c3817fc45dd01325ce02bbf557d5ab0cdea
diff --git a/dev-util/Tensile/Tensile-4.3.0.ebuild b/dev-util/Tensile/Tensile-4.3.0.ebuild
new file mode 100644
index 000000000000..cf33fb19cf10
--- /dev/null
+++ b/dev-util/Tensile/Tensile-4.3.0.ebuild
@@ -0,0 +1,74 @@
+# Copyright 1999-2022 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+PYTHON_COMPAT=( python3_{8..10} )
+DISTUTILS_USE_PEP517=setuptools
+inherit distutils-r1
+
+DESCRIPTION="Stretching GPU performance for GEMMs and tensor contractions"
+HOMEPAGE="https://github.com/ROCmSoftwarePlatform/Tensile"
+SRC_URI="https://github.com/ROCmSoftwarePlatform/Tensile/archive/rocm-${PV}.tar.gz -> rocm-Tensile-${PV}.tar.gz"
+S="${WORKDIR}/${PN}-rocm-${PV}"
+
+LICENSE="MIT"
+KEYWORDS="~amd64"
+SLOT="0/$(ver_cut 1-2)"
+
+# Not compatible with recent versions of pytest
+RESTRICT="test"
+
+RDEPEND="${PYTHON_DEPS}
+ dev-python/pyyaml[${PYTHON_USEDEP}]
+ dev-python/msgpack[${PYTHON_USEDEP}]
+"
+DEPEND="${RDEPEND}
+ dev-util/hip:${SLOT}
+"
+BDEPEND="test? (
+ dev-util/rocminfo:${SLOT}
+)"
+
+PATCHES=(
+ "${FILESDIR}/Tensile-${PV}-hsaco-compile-specified-arch.patch" # backported from upstream, should remove after 4.3.0
+ "${FILESDIR}/Tensile-4.3.0-output-commands.patch"
+)
+
+CMAKE_USE_DIR="${WORKDIR}/Source"
+
+distutils_enable_tests pytest
+
+src_prepare() {
+ distutils-r1_src_prepare
+
+ mv ${PN}/Source "${WORKDIR}"/ || die
+ sed -e "/ROCM_SMI_ROOT/s,lib,$(get_libdir)," \
+ -i "${WORKDIR}"/Source/cmake/FindROCmSMI.cmake || die
+ sed -r -e "/TENSILE_USE_LLVM/s/ON/OFF/" \
+ -i "${WORKDIR}"/Source/CMakeLists.txt || die
+
+ sed -e "/HipClangVersion/s/0,0,0/$(ver_rs 1-3 ,)/" \
+ -e "/SourcePath/s,os\.path\.join.*$,\"${EPREFIX}/usr/share/${PN}\"," \
+ -i ${PN}/Common.py || die
+
+ sed -e "s|os\.path\.dirname.*$|\"${EPREFIX}/usr/share/Tensile\", end='')|" \
+ -i ${PN}/__init__.py || die
+}
+
+src_test() {
+ ROCM_PATH="${EPREFIX}/usr/" distutils-r1_src_test
+}
+
+src_install() {
+ distutils-r1_src_install
+
+ # Move the cmake files to the correct directory
+ mkdir -p "${ED}/usr/$(get_libdir)/cmake/${PN}" || die
+ mv "${ED}/usr/cmake/"* "${ED}/usr/$(get_libdir)/cmake/${PN}" || die
+ rm -r "${ED}/usr/cmake" || die
+
+ insinto /usr/share/${PN}
+ doins -r "${WORKDIR}"/Source/*
+ dosym . /usr/share/${PN}/Source
+}
diff --git a/dev-util/Tensile/files/Tensile-4.3.0-hsaco-compile-specified-arch.patch b/dev-util/Tensile/files/Tensile-4.3.0-hsaco-compile-specified-arch.patch
new file mode 100644
index 000000000000..8e6753781a2a
--- /dev/null
+++ b/dev-util/Tensile/files/Tensile-4.3.0-hsaco-compile-specified-arch.patch
@@ -0,0 +1,96 @@
+https://github.com/ROCmSoftwarePlatform/Tensile/issues/1395
+https://github.com/ROCmSoftwarePlatform/Tensile/pull/1398
+
+--- a/Tensile/TensileCreateLibrary.py
++++ b/Tensile/TensileCreateLibrary.py
+@@ -136,6 +136,35 @@ def which(p):
+ return candidate
+ return None
+
++def splitArchs():
++ # Helper for architecture
++ def isSupported(arch):
++ return globalParameters["AsmCaps"][arch]["SupportedISA"] and \
++ globalParameters["AsmCaps"][arch]["SupportedSource"]
++
++ if ";" in globalParameters["Architecture"]:
++ wantedArchs = globalParameters["Architecture"].split(";")
++ else:
++ wantedArchs = globalParameters["Architecture"].split("_")
++ archs = []
++ cmdlineArchs = []
++ if "all" in wantedArchs:
++ for arch in globalParameters['SupportedISA']:
++ if isSupported(arch):
++ if (arch == (9,0,6) or arch == (9,0,8) or arch == (9,0,10)):
++ if (arch == (9,0,10)):
++ archs += [gfxName(arch) + '-xnack+']
++ cmdlineArchs += [gfxName(arch) + ':xnack+']
++ archs += [gfxName(arch) + '-xnack-']
++ cmdlineArchs += [gfxName(arch) + ':xnack-']
++ else:
++ archs += [gfxName(arch)]
++ cmdlineArchs += [gfxName(arch)]
++ else:
++ for arch in wantedArchs:
++ archs += [re.sub(":", "-", arch)]
++ cmdlineArchs += [arch]
++ return archs, cmdlineArchs
+
+ def buildSourceCodeObjectFile(CxxCompiler, outputPath, kernelFile):
+ buildPath = ensurePath(os.path.join(globalParameters['WorkingPath'], 'code_object_tmp'))
+@@ -149,24 +178,8 @@ def buildSourceCodeObjectFile(CxxCompiler, outputPath, kernelFile):
+ objectFilename = base + '.o'
+ soFilename = base + '.so'
+
+- def isSupported(arch):
+- return globalParameters["AsmCaps"][arch]["SupportedISA"] and \
+- globalParameters["AsmCaps"][arch]["SupportedSource"]
+-
+ if (CxxCompiler == "hipcc"):
+- archs = []
+- cmdlineArchs = []
+- for arch in globalParameters['SupportedISA']:
+- if isSupported(arch):
+- if (arch == (9,0,6) or arch == (9,0,8) or arch == (9,0,10)):
+- if (arch == (9,0,10)):
+- archs += [gfxName(arch) + '-xnack+']
+- cmdlineArchs += [gfxName(arch) + ':xnack+']
+- archs += [gfxName(arch) + '-xnack-']
+- cmdlineArchs += [gfxName(arch) + ':xnack-']
+- else:
+- archs += [gfxName(arch)]
+- cmdlineArchs += [gfxName(arch)]
++ archs, cmdlineArchs = splitArchs()
+
+ archFlags = ['--offload-arch=' + arch for arch in cmdlineArchs]
+
+@@ -1063,11 +1076,6 @@ def buildObjectFileNames(solutionWriter, kernelWriterSource, kernelWriterAssembl
+ sourceKernels = list([k for k in kernels if k['KernelLanguage'] == 'Source'])
+ asmKernels = list([k for k in kernels if k['KernelLanguage'] == 'Assembly'])
+
+- # Helper for architecture
+- def isSupported(arch):
+- return globalParameters["AsmCaps"][arch]["SupportedISA"] and \
+- globalParameters["AsmCaps"][arch]["SupportedSource"]
+-
+ # Build a list of kernel object names.
+ for kernel in sourceKernels:
+ sourceKernelNames += [kernelWriterSource.getKernelFileBase(kernel)]
+@@ -1081,15 +1089,7 @@ def buildObjectFileNames(solutionWriter, kernelWriterSource, kernelWriterAssembl
+
+ # Source based kernels are built for all supported architectures
+ if (cxxCompiler == 'hipcc'):
+- sourceArchs = []
+- for arch in globalParameters['SupportedISA']:
+- if isSupported(arch):
+- if (arch == (9,0,6) or arch == (9,0,8) or arch == (9,0,10)):
+- if (arch == (9,0,10)):
+- sourceArchs += [gfxName(arch) + '-xnack+']
+- sourceArchs += [gfxName(arch) + '-xnack-']
+- else:
+- sourceArchs += [gfxName(arch)]
++ sourceArchs, _ = splitArchs()
+ else:
+ raise RuntimeError("Unknown compiler %s" % cxxCompiler)
+
diff --git a/dev-util/Tensile/files/Tensile-4.3.0-output-commands.patch b/dev-util/Tensile/files/Tensile-4.3.0-output-commands.patch
new file mode 100644
index 000000000000..be5a4db21429
--- /dev/null
+++ b/dev-util/Tensile/files/Tensile-4.3.0-output-commands.patch
@@ -0,0 +1,23 @@
+diff --color -uprN orig/Tensile/cmake/TensileConfig.cmake Tensile-rocm-4.3.0/Tensile/cmake/TensileConfig.cmake
+--- orig/Tensile/cmake/TensileConfig.cmake 2021-08-18 17:48:00.115478470 +0800
++++ Tensile-rocm-4.3.0/Tensile/cmake/TensileConfig.cmake 2021-08-18 17:48:49.963478801 +0800
+@@ -234,6 +234,7 @@ function(TensileCreateLibraryFiles
+ COMMENT "Generating Tensile Libraries"
+ OUTPUT ${Tensile_EMBED_LIBRARY_SOURCE};${Tensile_MANIFEST_CONTENTS}
+ COMMAND ${CommandLine}
++ USES_TERMINAL
+ )
+
+ set("${Tensile_VAR_PREFIX}_ALL_FILES" ${Tensile_MANIFEST_CONTENTS} PARENT_SCOPE)
+diff --color -uprN orig/Tensile/Common.py Tensile-rocm-4.3.0/Tensile/Common.py
+--- orig/Tensile/Common.py 2021-08-18 17:48:00.075478470 +0800
++++ Tensile-rocm-4.3.0/Tensile/Common.py 2021-08-18 17:48:23.287478624 +0800
+@@ -179,7 +179,7 @@ globalParameters["PrintTensorD"] = 0
+ globalParameters["PrintTensorRef"] = 0 # Print reference tensor. 0x1=after init; 0x2=after copy-back; 0x3=both
+ globalParameters["PrintIndexAssignments"] = 0 # Print the tensor index assignment info
+ globalParameters["PrintWinnersOnly"] = False # Only print the solutions which become the fastest
+-globalParameters["PrintCodeCommands"] = False # print the commands used to generate the code objects (asm,link,hip-clang, etc)
++globalParameters["PrintCodeCommands"] = True # print the commands used to generate the code objects (asm,link,hip-clang, etc)
+ globalParameters["DumpTensors"] = False # If True, dump tensors to binary files instead of printing them.
+
+ # TODO - remove this when NewClient is mainstream
diff --git a/dev-util/Tensile/metadata.xml b/dev-util/Tensile/metadata.xml
new file mode 100644
index 000000000000..9bbebec502ca
--- /dev/null
+++ b/dev-util/Tensile/metadata.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE pkgmetadata SYSTEM 'http://www.gentoo.org/dtd/metadata.dtd'>
+<pkgmetadata>
+ <maintainer type="project">
+ <email>sci@gentoo.org</email>
+ <name>Science Project</name>
+ </maintainer>
+ <upstream>
+ <remote-id type="github">ROCmSoftwarePlatform/Tensile</remote-id>
+ </upstream>
+</pkgmetadata>