diff options
Diffstat (limited to 'sci-libs/rocBLAS/files')
5 files changed, 207 insertions, 0 deletions
diff --git a/sci-libs/rocBLAS/files/Tensile-4.3.0-hsaco-compile-specified-arch.patch b/sci-libs/rocBLAS/files/Tensile-4.3.0-hsaco-compile-specified-arch.patch new file mode 100644 index 000000000000..8e6753781a2a --- /dev/null +++ b/sci-libs/rocBLAS/files/Tensile-4.3.0-hsaco-compile-specified-arch.patch @@ -0,0 +1,96 @@ +https://github.com/ROCmSoftwarePlatform/Tensile/issues/1395 +https://github.com/ROCmSoftwarePlatform/Tensile/pull/1398 + +--- a/Tensile/TensileCreateLibrary.py ++++ b/Tensile/TensileCreateLibrary.py +@@ -136,6 +136,35 @@ def which(p): + return candidate + return None + ++def splitArchs(): ++ # Helper for architecture ++ def isSupported(arch): ++ return globalParameters["AsmCaps"][arch]["SupportedISA"] and \ ++ globalParameters["AsmCaps"][arch]["SupportedSource"] ++ ++ if ";" in globalParameters["Architecture"]: ++ wantedArchs = globalParameters["Architecture"].split(";") ++ else: ++ wantedArchs = globalParameters["Architecture"].split("_") ++ archs = [] ++ cmdlineArchs = [] ++ if "all" in wantedArchs: ++ for arch in globalParameters['SupportedISA']: ++ if isSupported(arch): ++ if (arch == (9,0,6) or arch == (9,0,8) or arch == (9,0,10)): ++ if (arch == (9,0,10)): ++ archs += [gfxName(arch) + '-xnack+'] ++ cmdlineArchs += [gfxName(arch) + ':xnack+'] ++ archs += [gfxName(arch) + '-xnack-'] ++ cmdlineArchs += [gfxName(arch) + ':xnack-'] ++ else: ++ archs += [gfxName(arch)] ++ cmdlineArchs += [gfxName(arch)] ++ else: ++ for arch in wantedArchs: ++ archs += [re.sub(":", "-", arch)] ++ cmdlineArchs += [arch] ++ return archs, cmdlineArchs + + def buildSourceCodeObjectFile(CxxCompiler, outputPath, kernelFile): + buildPath = ensurePath(os.path.join(globalParameters['WorkingPath'], 'code_object_tmp')) +@@ -149,24 +178,8 @@ def buildSourceCodeObjectFile(CxxCompiler, outputPath, kernelFile): + objectFilename = base + '.o' + soFilename = base + '.so' + +- def isSupported(arch): +- return globalParameters["AsmCaps"][arch]["SupportedISA"] and \ +- globalParameters["AsmCaps"][arch]["SupportedSource"] +- + if (CxxCompiler == "hipcc"): +- archs = [] +- cmdlineArchs = [] +- for arch in globalParameters['SupportedISA']: +- if isSupported(arch): +- if (arch == (9,0,6) or arch == (9,0,8) or arch == (9,0,10)): +- if (arch == (9,0,10)): +- archs += [gfxName(arch) + '-xnack+'] +- cmdlineArchs += [gfxName(arch) + ':xnack+'] +- archs += [gfxName(arch) + '-xnack-'] +- cmdlineArchs += [gfxName(arch) + ':xnack-'] +- else: +- archs += [gfxName(arch)] +- cmdlineArchs += [gfxName(arch)] ++ archs, cmdlineArchs = splitArchs() + + archFlags = ['--offload-arch=' + arch for arch in cmdlineArchs] + +@@ -1063,11 +1076,6 @@ def buildObjectFileNames(solutionWriter, kernelWriterSource, kernelWriterAssembl + sourceKernels = list([k for k in kernels if k['KernelLanguage'] == 'Source']) + asmKernels = list([k for k in kernels if k['KernelLanguage'] == 'Assembly']) + +- # Helper for architecture +- def isSupported(arch): +- return globalParameters["AsmCaps"][arch]["SupportedISA"] and \ +- globalParameters["AsmCaps"][arch]["SupportedSource"] +- + # Build a list of kernel object names. + for kernel in sourceKernels: + sourceKernelNames += [kernelWriterSource.getKernelFileBase(kernel)] +@@ -1081,15 +1089,7 @@ def buildObjectFileNames(solutionWriter, kernelWriterSource, kernelWriterAssembl + + # Source based kernels are built for all supported architectures + if (cxxCompiler == 'hipcc'): +- sourceArchs = [] +- for arch in globalParameters['SupportedISA']: +- if isSupported(arch): +- if (arch == (9,0,6) or arch == (9,0,8) or arch == (9,0,10)): +- if (arch == (9,0,10)): +- sourceArchs += [gfxName(arch) + '-xnack+'] +- sourceArchs += [gfxName(arch) + '-xnack-'] +- else: +- sourceArchs += [gfxName(arch)] ++ sourceArchs, _ = splitArchs() + else: + raise RuntimeError("Unknown compiler %s" % cxxCompiler) + diff --git a/sci-libs/rocBLAS/files/Tensile-4.3.0-output-commands.patch b/sci-libs/rocBLAS/files/Tensile-4.3.0-output-commands.patch new file mode 100644 index 000000000000..be5a4db21429 --- /dev/null +++ b/sci-libs/rocBLAS/files/Tensile-4.3.0-output-commands.patch @@ -0,0 +1,23 @@ +diff --color -uprN orig/Tensile/cmake/TensileConfig.cmake Tensile-rocm-4.3.0/Tensile/cmake/TensileConfig.cmake +--- orig/Tensile/cmake/TensileConfig.cmake 2021-08-18 17:48:00.115478470 +0800 ++++ Tensile-rocm-4.3.0/Tensile/cmake/TensileConfig.cmake 2021-08-18 17:48:49.963478801 +0800 +@@ -234,6 +234,7 @@ function(TensileCreateLibraryFiles + COMMENT "Generating Tensile Libraries" + OUTPUT ${Tensile_EMBED_LIBRARY_SOURCE};${Tensile_MANIFEST_CONTENTS} + COMMAND ${CommandLine} ++ USES_TERMINAL + ) + + set("${Tensile_VAR_PREFIX}_ALL_FILES" ${Tensile_MANIFEST_CONTENTS} PARENT_SCOPE) +diff --color -uprN orig/Tensile/Common.py Tensile-rocm-4.3.0/Tensile/Common.py +--- orig/Tensile/Common.py 2021-08-18 17:48:00.075478470 +0800 ++++ Tensile-rocm-4.3.0/Tensile/Common.py 2021-08-18 17:48:23.287478624 +0800 +@@ -179,7 +179,7 @@ globalParameters["PrintTensorD"] = 0 + globalParameters["PrintTensorRef"] = 0 # Print reference tensor. 0x1=after init; 0x2=after copy-back; 0x3=both + globalParameters["PrintIndexAssignments"] = 0 # Print the tensor index assignment info + globalParameters["PrintWinnersOnly"] = False # Only print the solutions which become the fastest +-globalParameters["PrintCodeCommands"] = False # print the commands used to generate the code objects (asm,link,hip-clang, etc) ++globalParameters["PrintCodeCommands"] = True # print the commands used to generate the code objects (asm,link,hip-clang, etc) + globalParameters["DumpTensors"] = False # If True, dump tensors to binary files instead of printing them. + + # TODO - remove this when NewClient is mainstream diff --git a/sci-libs/rocBLAS/files/rocBLAS-4.3.0-change-default-Tensile-library-dir.patch b/sci-libs/rocBLAS/files/rocBLAS-4.3.0-change-default-Tensile-library-dir.patch new file mode 100644 index 000000000000..1841424c6e56 --- /dev/null +++ b/sci-libs/rocBLAS/files/rocBLAS-4.3.0-change-default-Tensile-library-dir.patch @@ -0,0 +1,39 @@ +change the default rocm tensile library search path +--- orig/library/src/tensile_host.cpp 2021-08-21 17:56:47.040481580 +0800 ++++ rocBLAS-rocm-4.3.0/library/src/tensile_host.cpp 2021-08-21 17:58:46.360482372 +0800 +@@ -489,34 +489,7 @@ namespace + } + else + { +-#ifndef ROCBLAS_STATIC_LIB +- Dl_info info; +- +- // Find the location of librocblas.so +- // Fall back on hard-coded path if static library or not found +- // [Use a C API (rocblas_sccal) *not* defined in this file to +- // avoid compile-time resolution of the function pointer; cf. +- // https://man7.org/linux/man-pages/man3/dladdr.3.html "BUGS"] +- +- if(dladdr((void*)rocblas_sscal, &info)) +- { +- path = info.dli_fname; +- path = std::string{dirname(&path[0])}; +- } +- else +-#endif +- { +- path = "/opt/rocm/rocblas/lib"; +- } +- +- // Find the location of the libraries +- if(TestPath(path + "/../../Tensile/library")) +- path += "/../../Tensile/library"; +- else +- path += "/library"; +- +- if(TestPath(path + "/" + processor)) +- path += "/" + processor; ++ path="@GENTOO_PORTAGE_EPREFIX@/usr/lib64/rocblas/library"; + } + + // only load modules for the current architecture diff --git a/sci-libs/rocBLAS/files/rocBLAS-4.3.0-fix-glibc-2.32-and-above.patch b/sci-libs/rocBLAS/files/rocBLAS-4.3.0-fix-glibc-2.32-and-above.patch new file mode 100644 index 000000000000..a4d9f0bab344 --- /dev/null +++ b/sci-libs/rocBLAS/files/rocBLAS-4.3.0-fix-glibc-2.32-and-above.patch @@ -0,0 +1,25 @@ +https://sourceware.org/glibc/wiki/Release/2.32#Deprectation_sys_siglist.2C__sys_siglist.2C_sys_sigabbrev + +--- rocBLAS-rocm-4.1.0/clients/gtest/rocblas_test.cpp ++++ rocBLAS-rocm-4.1.0/clients/gtest/rocblas_test.cpp +@@ -173,7 +173,7 @@ void catch_signals_and_exceptions_as_fai + // Set up the return point, and handle siglongjmp returning back to here + if(sigsetjmp(t_handler.sigjmp_buf, true)) + { +- FAIL() << "Received " << sys_siglist[t_handler.signal] << " signal"; ++ FAIL() << "Received " << strsignal(t_handler.signal) << " signal"; + } + else + { + +--- rocBLAS-rocm-4.1.0/clients/include/utility.hpp ++++ rocBLAS-rocm-4.1.0/clients/include/utility.hpp +@@ -39,7 +39,7 @@ + // puts, putchar, fputs, printf, fprintf, vprintf, vfprintf: Use rocblas_cout or rocblas_cerr + // sprintf, vsprintf: Possible buffer overflows; us snprintf or vsnprintf instead + // strerror: Thread-unsafe; use snprintf / dprintf with %m or strerror_* alternatives +-// strsignal: Thread-unsafe; use sys_siglist[signal] instead ++// strsignal: Thread-unsafe; use strsignal(signal) instead + // strtok: Thread-unsafe; use strtok_r + // gmtime, ctime, asctime, localtime: Thread-unsafe + // tmpnam: Thread-unsafe; use mkstemp or related functions instead diff --git a/sci-libs/rocBLAS/files/rocBLAS-4.3.0-link-system-blas.patch b/sci-libs/rocBLAS/files/rocBLAS-4.3.0-link-system-blas.patch new file mode 100644 index 000000000000..184b76d7ef38 --- /dev/null +++ b/sci-libs/rocBLAS/files/rocBLAS-4.3.0-link-system-blas.patch @@ -0,0 +1,24 @@ +Link system blas libraries rather than the downloaded libraries (in install.sh) + +--- orig/clients/benchmarks/CMakeLists.txt ++++ rocBLAS-rocm-4.2.0/clients/benchmarks/CMakeLists.txt +@@ -49,7 +49,7 @@ target_include_directories( rocblas-benc + $<BUILD_INTERFACE:${BLIS_INCLUDE_DIR}> + ) + +-target_link_libraries( rocblas-bench PRIVATE rocblas_fortran_client roc::rocblas lapack cblas ) ++target_link_libraries( rocblas-bench PRIVATE rocblas_fortran_client roc::rocblas lapack cblas -lblas ) + if(LINK_BLIS) + target_link_libraries( rocblas-bench PRIVATE ${BLIS_LIBRARY} ) + endif() +--- orig/clients/gtest/CMakeLists.txt ++++ rocBLAS-rocm-4.2.0/clients/gtest/CMakeLists.txt +@@ -129,7 +129,7 @@ target_include_directories( rocblas-test + $<BUILD_INTERFACE:${BLIS_INCLUDE_DIR}> + ) + +-target_link_libraries( rocblas-test PRIVATE rocblas_fortran_client roc::rocblas lapack cblas ${GTEST_LIBRARIES} ) ++target_link_libraries( rocblas-test PRIVATE rocblas_fortran_client roc::rocblas lapack cblas -lblas ${GTEST_LIBRARIES} ) + if(LINK_BLIS) + target_link_libraries( rocblas-test PRIVATE ${BLIS_LIBRARY} ) + endif() |