Browse Source

Develop Stream: update to ROCm 6.1 (#138)

* Resolve "Update to ROCm 6.1"

* Fixed broken URLs

---------

Co-authored-by: Robin Voetter <robin@streamhpc.com>
pull/142/head
Beatriz Navidad Vilches 1 year ago committed by GitHub
parent
commit
f293afdd7f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 79
      .gitlab-ci.yml
  2. 3
      AI/MIGraphX/Quantization/README.md
  3. 2
      AI/MIGraphX/Quantization/Running-Quantized-ResNet50-via-MIGraphX.md
  4. 2
      Applications/README.md
  5. 114
      Dockerfiles/hip-libraries-cuda-ubuntu.Dockerfile
  6. 33
      Dockerfiles/hip-libraries-rocm-ubuntu.Dockerfile
  7. 4
      External/glad/glad.cpp
  8. 2
      HIP-Basic/README.md
  9. 2
      HIP-Basic/cooperative_groups/README.md
  10. 4
      HIP-Basic/device_query/README.md
  11. 4
      HIP-Basic/device_query/main.cpp
  12. 2
      HIP-Basic/multi_gpu_data_transfer/README.md
  13. 13
      HIP-Basic/texture_management/main.hip
  14. 2
      Libraries/hipBLAS/README.md
  15. 4
      Libraries/hipCUB/README.md
  16. 4
      Libraries/hipSOLVER/README.md
  17. 2
      Libraries/hipSOLVER/syevdx/README.md
  18. 4
      Libraries/rocBLAS/README.md
  19. 4
      Libraries/rocPRIM/README.md
  20. 4
      Libraries/rocRAND/README.md
  21. 8
      Libraries/rocSPARSE/README.md
  22. 2
      Libraries/rocSPARSE/level_2/bsrmv/README.md
  23. 2
      Libraries/rocSPARSE/level_2/bsrsv/README.md
  24. 2
      Libraries/rocSPARSE/level_2/bsrxmv/README.md
  25. 2
      Libraries/rocSPARSE/level_2/csritsv/README.md
  26. 2
      Libraries/rocSPARSE/level_2/csrmv/README.md
  27. 2
      Libraries/rocSPARSE/level_2/csrsv/README.md
  28. 2
      Libraries/rocSPARSE/level_2/gebsrmv/README.md
  29. 2
      Libraries/rocSPARSE/level_3/bsrmm/README.md
  30. 2
      Libraries/rocSPARSE/level_3/bsrsm/README.md
  31. 2
      Libraries/rocSPARSE/level_3/csrmm/README.md
  32. 2
      Libraries/rocSPARSE/level_3/csrsm/README.md
  33. 2
      Libraries/rocSPARSE/level_3/gebsrmm/README.md
  34. 2
      Libraries/rocSPARSE/level_3/gemmi/README.md
  35. 2
      Libraries/rocSPARSE/level_3/sddmm/README.md
  36. 2
      Libraries/rocSPARSE/level_3/spsm/README.md
  37. 2
      Libraries/rocSPARSE/preconditioner/bsric0/README.md
  38. 2
      Libraries/rocSPARSE/preconditioner/bsrilu0/README.md
  39. 2
      Libraries/rocSPARSE/preconditioner/csric0/README.md
  40. 2
      Libraries/rocSPARSE/preconditioner/csrilu0/README.md
  41. 2
      Libraries/rocSPARSE/preconditioner/csritilu0/README.md
  42. 4
      Libraries/rocThrust/README.md
  43. 2
      README.md

79
.gitlab-ci.yml

@ -1,6 +1,6 @@
# MIT License # MIT License
# #
# Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. # Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy # Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal # of this software and associated documentation files (the "Software"), to deal
@ -37,6 +37,13 @@ variables:
# argument unused during compilation: '--rtlib=compiler-rt' # argument unused during compilation: '--rtlib=compiler-rt'
CXX_FLAGS: "-Wno-unused-command-line-argument -Wall -Wextra -Werror" CXX_FLAGS: "-Wno-unused-command-line-argument -Wall -Wextra -Werror"
HIP_FLAGS: "-Wno-unused-command-line-argument -Wall -Wextra -Werror" HIP_FLAGS: "-Wno-unused-command-line-argument -Wall -Wextra -Werror"
# Keep in sync with ROCM_VERSION in Dockerfiles/hip-libraries-cuda-ubuntu.Dockerfile
# and Dockerfiles/hip-libraries-rocm-ubuntu.Dockerfile
DOCKER_ROCM_VERSION: 6.1.0
DOCKER_HIP_LIBRARIES_ROCM_TAG: rocm-ubuntu-${DOCKER_ROCM_VERSION}
DOCKER_HIP_LIBRARIES_CUDA_TAG: cuda-ubuntu-${DOCKER_ROCM_VERSION}
DOCKER_HIP_LIBRARIES_ROCM: $DOCKER_TAG_PREFIX:$DOCKER_HIP_LIBRARIES_ROCM_TAG
DOCKER_HIP_LIBRARIES_CUDA: $DOCKER_TAG_PREFIX:$DOCKER_HIP_LIBRARIES_CUDA_TAG
stages: stages:
- lint - lint
@ -44,7 +51,7 @@ stages:
- test - test
clang-format: clang-format:
image: $DOCKER_TAG_PREFIX:rocm-ubuntu image: $DOCKER_HIP_LIBRARIES_ROCM
stage: lint stage: lint
needs: [] needs: []
tags: tags:
@ -60,7 +67,7 @@ clang-format:
- Scripts/CodeFormat/check_format.sh $CI_MERGE_REQUEST_DIFF_BASE_SHA --binary "$CLANG_FORMAT" - Scripts/CodeFormat/check_format.sh $CI_MERGE_REQUEST_DIFF_BASE_SHA --binary "$CLANG_FORMAT"
copyright-date: copyright-date:
image: $DOCKER_TAG_PREFIX:rocm-ubuntu image: $DOCKER_HIP_LIBRARIES_ROCM
stage: lint stage: lint
needs: [] needs: []
tags: tags:
@ -88,7 +95,7 @@ copyright-date:
- >- - >-
/kaniko/executor /kaniko/executor
--context "${CI_PROJECT_DIR}/Dockerfiles" --context "${CI_PROJECT_DIR}/Dockerfiles"
--dockerfile "${CI_PROJECT_DIR}/Dockerfiles/hip-libraries-${TAG}.Dockerfile" --dockerfile "${CI_PROJECT_DIR}/Dockerfiles/${DOCKERFILE}"
--destination "docker.io/${DOCKER_TAG_PREFIX}:${TAG}" --destination "docker.io/${DOCKER_TAG_PREFIX}:${TAG}"
${NO_PUSH} ${NO_PUSH}
rules: rules:
@ -103,20 +110,22 @@ build:rocm-ubuntu-dockerfile:
extends: extends:
- .build:dockerfiles - .build:dockerfiles
variables: variables:
TAG: rocm-ubuntu DOCKERFILE: hip-libraries-rocm-ubuntu.Dockerfile
TAG: $DOCKER_HIP_LIBRARIES_ROCM_TAG
build:cuda-ubuntu-dockerfile: build:cuda-ubuntu-dockerfile:
extends: extends:
- .build:dockerfiles - .build:dockerfiles
variables: variables:
TAG: cuda-ubuntu DOCKERFILE: hip-libraries-cuda-ubuntu.Dockerfile
TAG: $DOCKER_HIP_LIBRARIES_CUDA_TAG
######################## ########################
# Ubuntu make # # Ubuntu make #
######################## ########################
build:make-rocm: build:make-rocm:
image: $DOCKER_TAG_PREFIX:rocm-ubuntu image: $DOCKER_HIP_LIBRARIES_ROCM
stage: build stage: build
extends: extends:
- .rules:build - .rules:build
@ -127,7 +136,7 @@ build:make-rocm:
- cd $CI_PROJECT_DIR && make CXXFLAGS="$HIP_FLAGS" -j $(nproc) - cd $CI_PROJECT_DIR && make CXXFLAGS="$HIP_FLAGS" -j $(nproc)
build:make-cuda: build:make-cuda:
image: $DOCKER_TAG_PREFIX:cuda-ubuntu image: $DOCKER_HIP_LIBRARIES_CUDA
stage: build stage: build
extends: extends:
- .rules:build - .rules:build
@ -151,7 +160,7 @@ build:make-cuda:
- $CI_PROJECT_DIR/build - $CI_PROJECT_DIR/build
build:cmake-rocm: build:cmake-rocm:
image: $DOCKER_TAG_PREFIX:rocm-ubuntu image: $DOCKER_HIP_LIBRARIES_ROCM
extends: extends:
- .build:cmake - .build:cmake
- .gpus:rocm-gpus - .gpus:rocm-gpus
@ -175,28 +184,28 @@ build:cmake-rocm:
- cmake --install $CI_PROJECT_DIR/build --prefix $CI_PROJECT_DIR/install - cmake --install $CI_PROJECT_DIR/build --prefix $CI_PROJECT_DIR/install
build:cmake-cuda: build:cmake-cuda:
image: $DOCKER_TAG_PREFIX:cuda-ubuntu image: $DOCKER_HIP_LIBRARIES_CUDA
extends: extends:
- .build:cmake - .build:cmake
tags: tags:
- build - build
script: script:
- cmake - cmake
-S $CI_PROJECT_DIR -S $CI_PROJECT_DIR
-B $CI_PROJECT_DIR/build -B $CI_PROJECT_DIR/build
-D GPU_RUNTIME=CUDA -D GPU_RUNTIME=CUDA
-D CMAKE_CXX_FLAGS="$CXX_FLAGS" -D CMAKE_CXX_FLAGS="$CXX_FLAGS"
-D CMAKE_CUDA_FLAGS="$CUDA_FLAGS" -D CMAKE_CUDA_FLAGS="$CUDA_FLAGS"
-D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip -D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip
2>&1 | tee cmake_log.txt 2>&1 | tee cmake_log.txt
# check if all dependencies were found # check if all dependencies were found
- |- - |-
if grep -qi "could not find" cmake_log.txt; then if grep -qi "could not find" cmake_log.txt; then
echo "Some CMake libraries could not be found" echo "Some CMake libraries could not be found"
exit 1 exit 1
fi fi
- cmake --build $CI_PROJECT_DIR/build - cmake --build $CI_PROJECT_DIR/build
- cmake --install $CI_PROJECT_DIR/build --prefix $CI_PROJECT_DIR/install - cmake --install $CI_PROJECT_DIR/build --prefix $CI_PROJECT_DIR/install
######################## ########################
# Ubuntu Tests # # Ubuntu Tests #
@ -207,10 +216,10 @@ build:cmake-cuda:
extends: extends:
- .rules:test - .rules:test
script: script:
- cd $CI_PROJECT_DIR/build && ctest --output-on-failure - cd $CI_PROJECT_DIR/build && ctest --output-on-failure --parallel 8
test:rocm: test:rocm:
image: $DOCKER_TAG_PREFIX:rocm-ubuntu image: $DOCKER_HIP_LIBRARIES_ROCM
extends: extends:
- .test - .test
- .gpus:rocm - .gpus:rocm
@ -218,7 +227,7 @@ test:rocm:
- build:cmake-rocm - build:cmake-rocm
test:cuda: test:cuda:
image: $DOCKER_TAG_PREFIX:cuda-ubuntu image: $DOCKER_HIP_LIBRARIES_CUDA
extends: extends:
- .test - .test
- .gpus:nvcc - .gpus:nvcc
@ -386,7 +395,7 @@ test:windows-nvcc-vs:
# So for now, just add the library path here. # So for now, just add the library path here.
- $env:PATH = "${env:HIP_PATH}\bin;" + $env:PATH - $env:PATH = "${env:HIP_PATH}\bin;" + $env:PATH
- cd "$CI_PROJECT_DIR/build" - cd "$CI_PROJECT_DIR/build"
- ctest --output-on-failure --timeout 15 - ctest --output-on-failure --timeout 15 --parallel 8
- cmake --install "$CI_PROJECT_DIR/build" --prefix "$CI_PROJECT_DIR/install" - cmake --install "$CI_PROJECT_DIR/build" --prefix "$CI_PROJECT_DIR/install"
needs: [] needs: []

3
AI/MIGraphX/Quantization/README.md

@ -1,7 +1,6 @@
# MIGraphX - Torch Examples # MIGraphX - Torch Examples
# Summary ## Summary
The examples in this subdirectory showcase the functionality for executing quantized models using MIGraphX. The Torch-MIGraphX integration library is used to achieve this, where PyTorch is used to quantize models, and MIGraphX is used to execute them on AMD GPUs. The examples in this subdirectory showcase the functionality for executing quantized models using MIGraphX. The Torch-MIGraphX integration library is used to achieve this, where PyTorch is used to quantize models, and MIGraphX is used to execute them on AMD GPUs.

2
AI/MIGraphX/Quantization/Running-Quantized-ResNet50-via-MIGraphX.md

@ -6,7 +6,7 @@ This example walks through the dynamo Post Training Quantization (PTQ) workflow
## Prerequisites ## Prerequisites
- You must follow the installation instructions for the torch_migraphx library in [README.md](README.md) before using this example. - You must follow the installation instructions for the torch_migraphx library in [AI/MIGraphX/Quantization](https://github.com/ROCm/rocm-examples/tree/develop/AI/MIGraphX/Quantization/) before using this example.
## Steps for running a quantized model using torch_migraphx ## Steps for running a quantized model using torch_migraphx

2
Applications/README.md

@ -10,7 +10,7 @@ The examples in this subdirectory showcase several GPU-implementations of financ
- [CMake](https://cmake.org/download/) (at least version 3.21) - [CMake](https://cmake.org/download/) (at least version 3.21)
- OR GNU Make - available via the distribution's package manager - OR GNU Make - available via the distribution's package manager
- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x) - [ROCm](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html) (at least version 6.x.x)
### Windows ### Windows

114
Dockerfiles/hip-libraries-cuda-ubuntu.Dockerfile

@ -1,5 +1,13 @@
# syntax=docker/dockerfile:latest
# Above is required for substitutions in environment variables
# CUDA based docker image # CUDA based docker image
FROM nvidia/cuda:12.0.0-devel-ubuntu20.04 FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
# The ROCm versions that this image is based of.
# Always write this down as major.minor.patch
ENV ROCM_VERSION=6.1.0
ENV ROCM_VERSION_APT=${ROCM_VERSION%.0}
# Base packages that are required for the installation # Base packages that are required for the installation
RUN export DEBIAN_FRONTEND=noninteractive; \ RUN export DEBIAN_FRONTEND=noninteractive; \
@ -19,17 +27,17 @@ RUN export DEBIAN_FRONTEND=noninteractive; \
vulkan-validationlayers \ vulkan-validationlayers \
libglfw3-dev \ libglfw3-dev \
gfortran \ gfortran \
# Nvidia driver version needed for hipSOLVER's CUDA backend.
# See https://docs.nvidia.com/deploy/cuda-compatibility/index.html#default-to-minor-version.
nvidia-driver-455 \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# Install HIP using the installer script # Install the HIP compiler and libraries from the ROCm repositories
RUN export DEBIAN_FRONTEND=noninteractive; \ RUN export DEBIAN_FRONTEND=noninteractive; \
wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - \ mkdir -p /etc/apt/keyrings \
&& echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/6.0/ ubuntu main' > /etc/apt/sources.list.d/rocm.list \ && wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor > /etc/apt/keyrings/rocm.gpg \
&& echo "deb [arch=amd64, signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$ROCM_VERSION_APT/ jammy main" > /etc/apt/sources.list.d/rocm.list \
&& printf 'Package: *\nPin: origin "repo.radeon.com"\nPin-Priority: 9001\n' > /etc/apt/preferences.d/radeon.pref \
&& apt-get update -qq \ && apt-get update -qq \
&& apt-get install -y hip-base hipify-clang rocm-core hipcc hip-dev && apt-get install -y hip-base hipify-clang rocm-core hipcc hip-dev rocm-llvm-dev \
&& rm -rf /var/lib/apt/lists/*
# Install CMake # Install CMake
RUN wget https://github.com/Kitware/CMake/releases/download/v3.21.7/cmake-3.21.7-linux-x86_64.sh \ RUN wget https://github.com/Kitware/CMake/releases/download/v3.21.7/cmake-3.21.7-linux-x86_64.sh \
@ -39,81 +47,87 @@ RUN wget https://github.com/Kitware/CMake/releases/download/v3.21.7/cmake-3.21.7
ENV PATH="/cmake/bin:/opt/rocm/bin:${PATH}" ENV PATH="/cmake/bin:/opt/rocm/bin:${PATH}"
ENV HIP_COMPILER=nvcc HIP_PLATFORM=nvidia HIP_RUNTIME=cuda
RUN echo "/opt/rocm/lib" >> /etc/ld.so.conf.d/rocm.conf \ RUN echo "/opt/rocm/lib" >> /etc/ld.so.conf.d/rocm.conf \
&& ldconfig && ldconfig
ENV HIP_COMPILER=nvcc HIP_PLATFORM=nvidia HIP_RUNTIME=cuda
# Install rocRAND # Install rocRAND
RUN wget https://github.com/ROCm/rocRAND/archive/refs/tags/rocm-6.0.0.tar.gz \ RUN wget https://github.com/ROCm/rocRAND/archive/refs/tags/rocm-${ROCM_VERSION}.tar.gz -O rocrand.tar.gz \
&& tar -xf ./rocm-6.0.0.tar.gz \ && mkdir rocrand \
&& rm ./rocm-6.0.0.tar.gz \ && tar -xf ./rocrand.tar.gz --strip-components 1 -C rocrand \
&& cmake -S ./rocRAND-rocm-6.0.0 -B ./rocRAND-rocm-6.0.0/build \ && rm ./rocrand.tar.gz \
&& cmake -S ./rocrand -B ./rocrand/build \
-D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip \ -D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip \
-D BUILD_HIPRAND=OFF \ -D BUILD_HIPRAND=OFF \
-D CMAKE_INSTALL_PREFIX=/opt/rocm \ -D CMAKE_INSTALL_PREFIX=/opt/rocm \
-D NVGPU_TARGETS="50" \ -D NVGPU_TARGETS="50" \
&& cmake --build ./rocRAND-rocm-6.0.0/build --target install \ && cmake --build ./rocrand/build --target install \
&& rm -rf ./rocRAND-rocm-6.0.0 && rm -rf ./rocrand
# Install hipCUB # Install hipCUB
RUN wget https://github.com/ROCm/hipCUB/archive/refs/tags/rocm-6.0.0.tar.gz \ RUN wget https://github.com/ROCm/hipCUB/archive/refs/tags/rocm-${ROCM_VERSION}.tar.gz -O hipcub.tar.gz \
&& tar -xf ./rocm-6.0.0.tar.gz \ && mkdir hipcub \
&& rm ./rocm-6.0.0.tar.gz \ && tar -xf ./hipcub.tar.gz --strip-components 1 -C hipcub \
&& cmake -S ./hipCUB-rocm-6.0.0 -B ./hipCUB-rocm-6.0.0/build \ && rm ./hipcub.tar.gz \
&& cmake -S ./hipcub -B ./hipcub/build \
-D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip \ -D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip \
-D CMAKE_INSTALL_PREFIX=/opt/rocm \ -D CMAKE_INSTALL_PREFIX=/opt/rocm \
&& cmake --build ./hipCUB-rocm-6.0.0/build --target install \ && cmake --build ./hipcub/build --target install \
&& rm -rf ./hipCUB-rocm-6.0.0 && rm -rf ./hipcub
# Install hipBLAS # Install hipBLAS
# hipBLAS cmake for rocm-6.0.0 is broken added CXXFLAGS=-D__HIP_PLATFORM_NVIDIA__ as fix RUN wget https://github.com/ROCm/hipBLAS/archive/refs/tags/rocm-${ROCM_VERSION}.tar.gz -O hipblas.tar.gz \
RUN wget https://github.com/ROCm/hipBLAS/archive/refs/tags/rocm-6.0.0.tar.gz \ && mkdir hipblas \
&& tar -xf ./rocm-6.0.0.tar.gz \ && tar -xf ./hipblas.tar.gz --strip-components 1 -C hipblas \
&& rm ./rocm-6.0.0.tar.gz \ && rm ./hipblas.tar.gz \
&& CXXFLAGS=-D__HIP_PLATFORM_NVIDIA__ cmake -S ./hipBLAS-rocm-6.0.0 -B ./hipBLAS-rocm-6.0.0/build \ && CXXFLAGS=-D__HIP_PLATFORM_NVIDIA__ cmake -S ./hipblas -B ./hipblas/build \
-D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip \ -D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip \
-D CMAKE_INSTALL_PREFIX=/opt/rocm \ -D CMAKE_INSTALL_PREFIX=/opt/rocm \
-D USE_CUDA=ON \ -D USE_CUDA=ON \
&& cmake --build ./hipBLAS-rocm-6.0.0/build --target install \ && cmake --build ./hipblas/build --target install \
&& rm -rf ./hipBLAS-rocm-6.0.0 && rm -rf ./hipblas
# Install hipSOLVER # Install hipSOLVER
# hipSOLVER cmake for rocm-6.0.0 is broken added CXXFLAGS=-D__HIP_PLATFORM_NVIDIA__ as fix # hipSOLVER cmake for rocm-6.1.0 is broken added CXXFLAGS=-D__HIP_PLATFORM_NVIDIA__ as fix
RUN wget https://github.com/ROCm/hipSOLVER/archive/refs/tags/rocm-6.0.0.tar.gz \ RUN wget https://github.com/ROCm/hipSOLVER/archive/refs/tags/rocm-${ROCM_VERSION}.tar.gz -O hipsolver.tar.gz \
&& tar -xf ./rocm-6.0.0.tar.gz \ && mkdir hipsolver \
&& rm ./rocm-6.0.0.tar.gz \ && tar -xf ./hipsolver.tar.gz --strip-components 1 -C hipsolver \
&& CXXFLAGS=-D__HIP_PLATFORM_NVIDIA__ cmake -S ./hipSOLVER-rocm-6.0.0 -B ./hipSOLVER-rocm-6.0.0/build \ && rm ./hipsolver.tar.gz \
&& CXXFLAGS=-D__HIP_PLATFORM_NVIDIA__ cmake -S ./hipsolver -B ./hipsolver/build \
-D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip \ -D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip \
-D CMAKE_INSTALL_PREFIX=/opt/rocm \ -D CMAKE_INSTALL_PREFIX=/opt/rocm \
-D USE_CUDA=ON \ -D USE_CUDA=ON \
&& cmake --build ./hipSOLVER-rocm-6.0.0/build --target install \ && cmake --build ./hipsolver/build --target install \
&& rm -rf ./hipSOLVER-rocm-6.0.0 && rm -rf ./hipsolver
# Install hipRAND # Install hipRAND
# Build from commit that removes deprecated macro use # Manually replace usage of __HIP_PLATFORM_NVCC__ with __HIP_PLATFORM_NVIDIA__. See
RUN git clone https://github.com/ROCm/hipRAND.git hipRAND-rocm-6.0.0 \ # https://github.com/ROCm/hipRAND/commit/4925f0da96fad5b9f532ddc79f1f52fc279d329f
&& cd hipRAND-rocm-6.0.0 \ RUN wget https://github.com/ROCm/hipRAND/archive/refs/tags/rocm-${ROCM_VERSION}.tar.gz -O hiprand.tar.gz \
&& git reset --hard 4925f0da96fad5b9f532ddc79f1f52fc279d329f \ && mkdir hiprand \
&& cmake -S . -B ./build \ && tar -xf ./hiprand.tar.gz --strip-components 1 -C hiprand \
&& rm ./hiprand.tar.gz \
&& sed -i s/__HIP_PLATFORM_NVCC__/__HIP_PLATFORM_NVIDIA__/ ./hiprand/library/include/hiprand/hiprand.h \
&& cmake -S ./hiprand -B ./hiprand/build \
-D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip \ -D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip \
-D CMAKE_INSTALL_PREFIX=/opt/rocm \ -D CMAKE_INSTALL_PREFIX=/opt/rocm \
-D BUILD_WITH_LIB=CUDA \ -D BUILD_WITH_LIB=CUDA \
-D NVGPU_TARGETS="50" \ -D NVGPU_TARGETS="50" \
&& cmake --build ./build --target install \ && cmake --build ./hiprand/build --target install \
&& cd .. \ && rm -rf ./hiprand
&& rm -rf ./hipRAND-rocm-6.0.0
# Install hipFFT # Install hipFFT
RUN wget https://github.com/ROCm/hipFFT/archive/refs/tags/rocm-6.0.0.tar.gz \ RUN wget https://github.com/ROCm/hipFFT/archive/refs/tags/rocm-${ROCM_VERSION}.tar.gz -O hipfft.tar.gz \
&& tar -xf ./rocm-6.0.0.tar.gz \ && mkdir hipfft \
&& rm ./rocm-6.0.0.tar.gz \ && tar -xf ./hipfft.tar.gz --strip-components 1 -C hipfft \
&& cmake -S ./hipFFT-rocm-6.0.0 -B ./hipFFT-rocm-6.0.0/build \ && rm ./hipfft.tar.gz \
&& cmake -S ./hipfft -B ./hipfft/build \
-D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip \ -D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip \
-D CMAKE_INSTALL_PREFIX=/opt/rocm \ -D CMAKE_INSTALL_PREFIX=/opt/rocm \
-D BUILD_WITH_LIB=CUDA \ -D BUILD_WITH_LIB=CUDA \
&& cmake --build ./hipFFT-rocm-6.0.0/build --target install \ && cmake --build ./hipfft/build --target install \
&& rm -rf ./hipFFT-rocm-6.0.0 && rm -rf ./hipfft
# Use render group as an argument from user # Use render group as an argument from user
ARG GID=109 ARG GID=109

33
Dockerfiles/hip-libraries-rocm-ubuntu.Dockerfile

@ -1,5 +1,13 @@
# syntax=docker/dockerfile:latest
# Above is required for substitutions in environment variables
# Ubuntu based docker image # Ubuntu based docker image
FROM ubuntu:20.04 FROM ubuntu:22.04
# The ROCm versions that this image is based of.
# Always write this down as major.minor.patch
ENV ROCM_VERSION=6.1.0
ENV ROCM_VERSION_APT=${ROCM_VERSION%.0}
# Base packages that are required for the installation # Base packages that are required for the installation
RUN export DEBIAN_FRONTEND=noninteractive; \ RUN export DEBIAN_FRONTEND=noninteractive; \
@ -18,18 +26,29 @@ RUN export DEBIAN_FRONTEND=noninteractive; \
libvulkan-dev \ libvulkan-dev \
vulkan-validationlayers \ vulkan-validationlayers \
libglfw3-dev \ libglfw3-dev \
gnupg \
g++ \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
ENV LANG en_US.utf8 ENV LANG en_US.utf8
# Install ROCM HIP and libraries using the installer script # Install the HIP compiler and libraries from the ROCm repositories
RUN export DEBIAN_FRONTEND=noninteractive; \ RUN export DEBIAN_FRONTEND=noninteractive; \
wget https://repo.radeon.com/amdgpu-install/6.0/ubuntu/focal/amdgpu-install_6.0.60000-1_all.deb \ mkdir -p /etc/apt/keyrings \
&& wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor > /etc/apt/keyrings/rocm.gpg \
&& echo "deb [arch=amd64, signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$ROCM_VERSION_APT/ jammy main" > /etc/apt/sources.list.d/rocm.list \
&& printf 'Package: *\nPin: origin "repo.radeon.com"\nPin-Priority: 9001\n' > /etc/apt/preferences.d/radeon.pref \
&& apt-get update -qq \ && apt-get update -qq \
&& apt-get install -y ./amdgpu-install_6.0.60000-1_all.deb \ && apt-get install --no-install-recommends -y \
&& rm ./amdgpu-install_6.0.60000-1_all.deb\ hip-base hipify-clang rocm-core hipcc \
&& amdgpu-install -y --usecase=hiplibsdk --no-dkms \ hip-dev rocm-hip-runtime-dev rocm-llvm-dev \
&& apt-get install -y libnuma-dev \ rocrand-dev hiprand-dev \
rocprim-dev hipcub-dev \
rocblas-dev hipblas-dev \
rocsolver-dev hipsolver-dev \
rocfft-dev hipfft-dev \
rocsparse-dev \
rocthrust-dev \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# Install CMake # Install CMake

4
External/glad/glad.cpp vendored

@ -1867,8 +1867,8 @@ static void find_coreGL(void)
{ {
/* Thank you @elmindreda /* Thank you @elmindreda
* https://github.com/elmindreda/greg/blob/master/templates/greg.c.in#L176 * https://github.com/elmindreda/greg/blob/master/templates/greg.h.in
* https://github.com/glfw/glfw/blob/master/src/context.c#L36 * https://github.com/glfw/glfw/blob/master/src/context.c
*/ */
int i, major, minor; int i, major, minor;

2
HIP-Basic/README.md

@ -10,7 +10,7 @@ The examples in this subdirectory showcase the functionality of the HIP runtime.
- [CMake](https://cmake.org/download/) (at least version 3.21) - [CMake](https://cmake.org/download/) (at least version 3.21)
- OR GNU Make - available via the distribution's package manager - OR GNU Make - available via the distribution's package manager
- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x) - [ROCm](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html) (at least version 6.x.x)
### Windows ### Windows

2
HIP-Basic/cooperative_groups/README.md

@ -35,7 +35,7 @@ The partitioned threads can reside across multiple devices.
- `thread_block` - `thread_block`
- `tiled_partition<size>()` - `tiled_partition<size>()`
- `thread_block_tile` - `thread_block_tile`
- All above from the [`cooperative_groups` namespace](https://github.com/ROCm-Developer-Tools/hipamd/blob/develop/include/hip/amd_detail/amd_hip_cooperative_groups.h) - All above from the [`cooperative_groups` namespace](https://github.com/ROCm/clr/blob/develop/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h)
#### Host symbols #### Host symbols

4
HIP-Basic/device_query/README.md

@ -16,9 +16,9 @@ This example shows how the target platform and compiler can be identified, as we
## Key APIs and Concepts ## Key APIs and Concepts
- HIP code can target the AMD and the NVIDIA platform, and it can be compiled with different compilers. Compiler-defined macros can be used in HIP code to write code that is specific to a target or a compiler. See [HIP Programming Guide - Distinguishing Compiler Modes](https://docs.amd.com/bundle/HIP-Programming-Guide-v5.2/page/Transitioning_from_CUDA_to_HIP.html#d4438e664) for more details. - HIP code can target the AMD and the NVIDIA platform, and it can be compiled with different compilers. Compiler-defined macros can be used in HIP code to write code that is specific to a target or a compiler. See [HIP Programming Guide - Distinguishing Compiler Modes](https://rocm.docs.amd.com/projects/HIP/en/latest/how-to/hip_porting_guide.html#distinguishing-compiler-modes) for more details.
- `hipGetDeviceCount` returns the number of devices in the system. Some device management API functions take an identifier for each device, which is a monotonically incrementing number starting from zero. Others require the active device to be set, with `hipSetDevice`. A full overview of the device management API can be found at [HIP API - Device Management](https://docs.amd.com/bundle/HIP_API_Guide/page/group___device.html). - `hipGetDeviceCount` returns the number of devices in the system. Some device management API functions take an identifier for each device, which is a monotonically incrementing number starting from zero. Others require the active device to be set, with `hipSetDevice`. A full overview of the device management API can be found at [HIP API - Device Management](https://rocm.docs.amd.com/projects/HIP/en/latest/doxygen/html/group___device.html).
## Demonstrated API Calls ## Demonstrated API Calls

4
HIP-Basic/device_query/main.cpp

@ -1,6 +1,6 @@
// MIT License // MIT License
// //
// Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
// //
// Permission is hereby granted, free of charge, to any person obtaining a copy // Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal // of this software and associated documentation files (the "Software"), to deal
@ -63,7 +63,7 @@ void print_device_properties(int device_id)
HIP_CHECK(hipGetDeviceProperties(&props, device_id)); HIP_CHECK(hipGetDeviceProperties(&props, device_id));
// Print a small set of all available properties. A full list can be found at: // Print a small set of all available properties. A full list can be found at:
// https://docs.amd.com/bundle/HIP_API_Guide/page/structhip_device_prop__t.html // https://rocm.docs.amd.com/projects/HIP/en/latest/doxygen/html/structhip_device_prop__t.html
std::cout << std::setw(col_w) << "Name: " << props.name << '\n'; std::cout << std::setw(col_w) << "Name: " << props.name << '\n';
std::cout << std::setw(col_w) std::cout << std::setw(col_w)
<< "totalGlobalMem: " << double_precision(bytes_to_gib(props.totalGlobalMem), 2, true) << "totalGlobalMem: " << double_precision(bytes_to_gib(props.totalGlobalMem), 2, true)

2
HIP-Basic/multi_gpu_data_transfer/README.md

@ -32,7 +32,7 @@ In this example, the result of a matrix transpose kernel execution on one device
- With `hipMemcpy` data bytes can be transferred from host to device (using `hipMemcpyHostToDevice`), from device to host (using `hipMemcpyDeviceToHost`) or from device to device (using `hipMemcpyDeviceToDevice`). The latter will only work if P2P communication has been enabled from the destination to the source device. - With `hipMemcpy` data bytes can be transferred from host to device (using `hipMemcpyHostToDevice`), from device to host (using `hipMemcpyDeviceToHost`) or from device to device (using `hipMemcpyDeviceToDevice`). The latter will only work if P2P communication has been enabled from the destination to the source device.
- `myKernelName<<<...>>>` queues the execution of a kernel in the current device and `hipDeviceSynchronize` makes the host to wait on all active streams on the current device. In this example `hipDeviceSynchronize` is necessary because the second device needs the results obtained from the previous kernel execution on the first device. - `myKernelName<<<...>>>` queues the execution of a kernel in the current device and `hipDeviceSynchronize` makes the host to wait on all active streams on the current device. In this example `hipDeviceSynchronize` is necessary because the second device needs the results obtained from the previous kernel execution on the first device.
- `hipDeviceReset` discards the state of the current device and updates it to fresh one. It also frees all the resources (e.g. streams, events, ...) associated with the current device. - `hipDeviceReset` discards the state of the current device and updates it to fresh one. It also frees all the resources (e.g. streams, events, ...) associated with the current device.
- It's a [known issue with multi-GPU environments](https://community.amd.com/t5/knowledge-base/iommu-advisory-for-multi-gpu-environments/ta-p/477468) that some multi-GPU environments fail due to limitations of the IOMMU enablement, so it may be needed to explicitly enable/disable the IOMMU using the kernel command-line parameter `iommu=pt/off`. - It's a [known issue with multi-GPU environments](https://community.amd.com/t5/knowledge-base/iommu-advisory-for-amd-instinct/ta-p/484601) that some multi-GPU environments fail due to limitations of the IOMMU enablement, so it may be needed to explicitly enable/disable the IOMMU using the kernel command-line parameter `iommu=pt/off`.
## Demonstrated API Calls ## Demonstrated API Calls

13
HIP-Basic/texture_management/main.hip

@ -1,6 +1,6 @@
// MIT License // MIT License
// //
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
// //
// Permission is hereby granted, free of charge, to any person obtaining a copy // Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal // of this software and associated documentation files (the "Software"), to deal
@ -47,8 +47,17 @@ __global__ void histogram_kernel(unsigned int* histogram,
float u = x / static_cast<float>(size_x) + .5f; float u = x / static_cast<float>(size_x) + .5f;
float v = y / static_cast<float>(size_y) + .5f; float v = y / static_cast<float>(size_y) + .5f;
// Read the value from the texture. #if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT
// Read the value from the texture, if supported.
unsigned char val = tex2D<unsigned char>(tex_obj, u, v); unsigned char val = tex2D<unsigned char>(tex_obj, u, v);
#else
// Prevent compile errors on HIP architectures that do not support
// texture instructions.
(void)u;
(void)v;
(void)tex_obj;
unsigned char val = 0;
#endif
// Determine the histogram bin and write to global memory. // Determine the histogram bin and write to global memory.
unsigned int bin_range = ceiling_div(256, hist_bin_count); unsigned int bin_range = ceiling_div(256, hist_bin_count);

2
Libraries/hipBLAS/README.md

@ -10,7 +10,7 @@ The examples in this subdirectory showcase the functionality of the [hipBLAS](ht
- [CMake](https://cmake.org/download/) (at least version 3.21) - [CMake](https://cmake.org/download/) (at least version 3.21)
- OR GNU Make - available via the distribution's package manager - OR GNU Make - available via the distribution's package manager
- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.2/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x) - [ROCm](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html) (at least version 6.x.x)
- [hipBLAS](https://github.com/ROCmSoftwarePlatform/hipBLAS): `hipblas` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). - [hipBLAS](https://github.com/ROCmSoftwarePlatform/hipBLAS): `hipblas` package available from [repo.radeon.com](https://repo.radeon.com/rocm/).
### Windows ### Windows

4
Libraries/hipCUB/README.md

@ -10,9 +10,9 @@ The examples in this subdirectory showcase the functionality of the [hipCUB](htt
- [CMake](https://cmake.org/download/) (at least version 3.21) - [CMake](https://cmake.org/download/) (at least version 3.21)
- OR GNU Make - available via the distribution's package manager - OR GNU Make - available via the distribution's package manager
- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x) - [ROCm](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html) (at least version 6.x.x)
- [hipCUB](https://github.com/ROCmSoftwarePlatform/hipCUB) - [hipCUB](https://github.com/ROCmSoftwarePlatform/hipCUB)
- ROCm platform: `hipCUB-dev` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/How_to_Install_ROCm.html). - ROCm platform: `hipCUB-dev` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html).
- CUDA platform: Install hipCUB from source: [instructions](https://github.com/ROCmSoftwarePlatform/hipCUB#build-and-install). - CUDA platform: Install hipCUB from source: [instructions](https://github.com/ROCmSoftwarePlatform/hipCUB#build-and-install).
- [CUB](https://github.com/NVIDIA/cub) is a dependency of hipCUB for NVIDIA platforms. CUB is part of the NVIDIA CUDA Toolkit. - [CUB](https://github.com/NVIDIA/cub) is a dependency of hipCUB for NVIDIA platforms. CUB is part of the NVIDIA CUDA Toolkit.

4
Libraries/hipSOLVER/README.md

@ -10,8 +10,8 @@ The examples in this subdirectory showcase the functionality of the [hipSOLVER](
- [CMake](https://cmake.org/download/) (at least version 3.21) - [CMake](https://cmake.org/download/) (at least version 3.21)
- OR GNU Make - available via the distribution's package manager - OR GNU Make - available via the distribution's package manager
- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.2/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x) - [ROCm](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html) (at least version 6.x.x)
- [hipSOLVER](https://github.com/ROCmSoftwarePlatform/hipSOLVER): `hipsolver` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.2/page/How_to_Install_ROCm.html). - [hipSOLVER](https://github.com/ROCmSoftwarePlatform/hipSOLVER): `hipsolver` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html).
### Windows ### Windows

2
Libraries/hipSOLVER/syevdx/README.md

@ -2,7 +2,7 @@
## Description ## Description
This example illustrates how to solve the standard symmetric-definite eigenvalue problem for a symmetric matrix $A$ using hipSOLVER's [Compatibility API](https://hipsolver.readthedocs.io/en/rocm-5.4.4/compat_index.html). This API offers wrapper functions for the ones existing in hipSOLVER (and their equivalents in [cuSolverDN](https://docs.nvidia.com/cuda/cusolver/index.html#cusolverdn-dense-lapack)) and is intended to be used when porting cuSOLVER applications to hipSOLVER ones. The main advantage of this API is that its functions follow the same method signature format as cuSolverDN's, which makes easier the port. This example illustrates how to solve the standard symmetric-definite eigenvalue problem for a symmetric matrix $A$ using hipSOLVER's [Compatibility API](https://rocm.docs.amd.com/projects/hipSOLVER/en/latest/reference/compat-api/lapacklike.html). This API offers wrapper functions for the ones existing in hipSOLVER (and their equivalents in [cuSolverDN](https://docs.nvidia.com/cuda/cusolver/index.html#cusolverdn-dense-lapack)) and is intended to be used when porting cuSOLVER applications to hipSOLVER ones. The main advantage of this API is that its functions follow the same method signature format as cuSolverDN's, which makes easier the port.
Given an $n \times n$ symmetric matrix $A$, the said problem consists on solving the following equation: Given an $n \times n$ symmetric matrix $A$, the said problem consists on solving the following equation:

4
Libraries/rocBLAS/README.md

@ -10,8 +10,8 @@ The examples in this subdirectory showcase the functionality of the [rocBLAS](ht
- [CMake](https://cmake.org/download/) (at least version 3.21) - [CMake](https://cmake.org/download/) (at least version 3.21)
- OR GNU Make - available via the distribution's package manager - OR GNU Make - available via the distribution's package manager
- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.2/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x) - [ROCm](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html) (at least version 6.x.x)
- [rocBLAS](https://github.com/ROCmSoftwarePlatform/rocBLAS): `rocblas` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.2/page/How_to_Install_ROCm.html). - [rocBLAS](https://github.com/ROCmSoftwarePlatform/rocBLAS): `rocblas` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html).
### Windows ### Windows

4
Libraries/rocPRIM/README.md

@ -10,9 +10,9 @@ The examples in this subdirectory showcase the functionality of the [rocPRIM](ht
- [CMake](https://cmake.org/download/) (at least version 3.21) - [CMake](https://cmake.org/download/) (at least version 3.21)
- OR GNU Make - available via the distribution's package manager - OR GNU Make - available via the distribution's package manager
- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x) - [ROCm](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html) (at least version 6.x.x)
- [rocPRIM](https://github.com/ROCmSoftwarePlatform/rocPRIM) - [rocPRIM](https://github.com/ROCmSoftwarePlatform/rocPRIM)
- `rocPRIM-dev` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/How_to_Install_ROCm.html). - `rocPRIM-dev` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html).
### Windows ### Windows

4
Libraries/rocRAND/README.md

@ -10,9 +10,9 @@ The examples in this subdirectory showcase the functionality of the [rocRAND](ht
- [CMake](https://cmake.org/download/) (at least version 3.21) - [CMake](https://cmake.org/download/) (at least version 3.21)
- OR GNU Make - available via the distribution's package manager - OR GNU Make - available via the distribution's package manager
- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x) OR the HIP Nvidia runtime (on the CUDA platform) - [ROCm](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html) (at least version 6.x.x) OR the HIP Nvidia runtime (on the CUDA platform)
- [rocRAND](https://github.com/rocmSoftwarePlatform/rocRAND) - [rocRAND](https://github.com/rocmSoftwarePlatform/rocRAND)
- ROCm platform: `rocrand-dev` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/How_to_Install_ROCm.html). - ROCm platform: `rocrand-dev` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html).
- CUDA platform: Install rocRAND from source: [instructions](https://github.com/rocmSoftwarePlatform/rocRAND#build-and-install). - CUDA platform: Install rocRAND from source: [instructions](https://github.com/rocmSoftwarePlatform/rocRAND#build-and-install).
### Windows ### Windows

8
Libraries/rocSPARSE/README.md

@ -16,13 +16,13 @@ All rocSPARSE library functions, unless otherwise stated, are non blocking and e
- OR GNU Make - available via the distribution's package manager - OR GNU Make - available via the distribution's package manager
- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x) OR the HIP Nvidia runtime (on the CUDA platform) - [ROCm](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html) (at least version 6.x.x) OR the HIP Nvidia runtime (on the CUDA platform)
- [rocSPARSE](https://github.com/rocmSoftwarePlatform/rocSPARSE) - [rocSPARSE](https://github.com/rocmSoftwarePlatform/rocSPARSE)
- ROCm platform: `rocsparse` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/How_to_Install_ROCm.html). - ROCm platform: `rocsparse` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html).
- CUDA platform: Install rocSPARSE from source: [instructions](https://rocsparse.readthedocs.io/en/rocm-5.5.0/usermanual.html#building-rocsparse-from-source). - CUDA platform: Install rocSPARSE from source: [instructions](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/install/Linux_Install_Guide.html).
### Windows ### Windows
@ -35,7 +35,7 @@ All rocSPARSE library functions, unless otherwise stated, are non blocking and e
- [rocSPARSE](https://github.com/rocmSoftwarePlatform/rocSPARSE) - [rocSPARSE](https://github.com/rocmSoftwarePlatform/rocSPARSE)
- ROCm platform: Installed as part of the ROCm SDK on Windows. - ROCm platform: Installed as part of the ROCm SDK on Windows.
- CUDA platform: Install rocSPARSE from source: [instructions](https://rocsparse.readthedocs.io/en/rocm-5.5.0/usermanual.html#building-rocsparse-from-source). - CUDA platform: Install rocSPARSE from source: [instructions](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/install/Linux_Install_Guide.html).
- [CMake](https://cmake.org/download/) (optional, to build with CMake. Requires at least version 3.21) - [CMake](https://cmake.org/download/) (optional, to build with CMake. Requires at least version 3.21)

2
Libraries/rocSPARSE/level_2/bsrmv/README.md

@ -29,7 +29,7 @@ where
### BSR Matrix Storage Format ### BSR Matrix Storage Format
The [Block Compressed Sparse Row (BSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero. The [Block Compressed Sparse Row (BSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero.
Therefore, defining Therefore, defining

2
Libraries/rocSPARSE/level_2/bsrsv/README.md

@ -39,7 +39,7 @@ Obtaining the solution for such a system consists of finding concrete values of
### BSR Matrix Storage Format ### BSR Matrix Storage Format
The [Block Compressed Sparse Row (BSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero. The [Block Compressed Sparse Row (BSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero.
Therefore, defining Therefore, defining

2
Libraries/rocSPARSE/level_2/bsrxmv/README.md

@ -32,7 +32,7 @@ otherwise it returns the identical $\mathbf{y}$ vector elements.
### BSR Matrix Storage Format ### BSR Matrix Storage Format
The [Block Compressed Sparse Row (BSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero. The [Block Compressed Sparse Row (BSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero.
Therefore, defining Therefore, defining

2
Libraries/rocSPARSE/level_2/csritsv/README.md

@ -40,7 +40,7 @@ Obtaining solution for such a system consists on finding concrete values of all
### CSR Matrix Storage Format ### CSR Matrix Storage Format
The [Compressed Sparse Row (CSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays. The [Compressed Sparse Row (CSR) storage format](https://rocm.docs.amd.com/projects/hipSPARSE/en/latest/basics.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
Defining Defining

2
Libraries/rocSPARSE/level_2/csrmv/README.md

@ -29,7 +29,7 @@ where
### CSR Matrix Storage Format ### CSR Matrix Storage Format
The [Compressed Sparse Row (CSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays. The [Compressed Sparse Row (CSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
Defining Defining

2
Libraries/rocSPARSE/level_2/csrsv/README.md

@ -39,7 +39,7 @@ Obtaining solution for such a system consists on finding concrete values of all
### CSR Matrix Storage Format ### CSR Matrix Storage Format
The [Compressed Sparse Row (CSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays. The [Compressed Sparse Row (CSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
Defining Defining

2
Libraries/rocSPARSE/level_2/gebsrmv/README.md

@ -33,7 +33,7 @@ where
### GEBSR Matrix Storage Format ### GEBSR Matrix Storage Format
The [General Block Compressed Sparse Row (GEBSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#gebsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is the same as for the BSR format, but the blocks in which the sparse matrix is split are not squared. All of them are of `bsr_row_dim` $\times$ `bsr_col_dim` size. The [General Block Compressed Sparse Row (GEBSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#gebsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is the same as for the BSR format, but the blocks in which the sparse matrix is split are not squared. All of them are of `bsr_row_dim` $\times$ `bsr_col_dim` size.
Therefore, defining Therefore, defining

2
Libraries/rocSPARSE/level_3/bsrmm/README.md

@ -30,7 +30,7 @@ where
### BSR Matrix Storage Format ### BSR Matrix Storage Format
The [Block Compressed Sparse Row (BSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero. The [Block Compressed Sparse Row (BSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero.
Therefore, defining Therefore, defining

2
Libraries/rocSPARSE/level_3/bsrsm/README.md

@ -42,7 +42,7 @@ This is the same as solving the classical system of linear equations $op_a(A) x_
### BSR Matrix Storage Format ### BSR Matrix Storage Format
The [Block Compressed Sparse Row (BSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero. The [Block Compressed Sparse Row (BSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero.
Therefore, defining Therefore, defining

2
Libraries/rocSPARSE/level_3/csrmm/README.md

@ -30,7 +30,7 @@ where
### CSR Matrix Storage Format ### CSR Matrix Storage Format
The [Compressed Sparse Row (CSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays. The [Compressed Sparse Row (CSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
Defining Defining

2
Libraries/rocSPARSE/level_3/csrsm/README.md

@ -43,7 +43,7 @@ This is the same as solving the classical system of linear equations $op_a(A) x_
### CSR Matrix Storage Format ### CSR Matrix Storage Format
The [Compressed Sparse Row (CSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays. The [Compressed Sparse Row (CSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
Defining Defining

2
Libraries/rocSPARSE/level_3/gebsrmm/README.md

@ -30,7 +30,7 @@ where
### GEBSR Matrix Storage Format ### GEBSR Matrix Storage Format
The [General Block Compressed Sparse Row (GEBSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#gebsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is the same as for the BSR format, but the blocks in which the sparse matrix is split are not squared. All of them are of `bsr_row_dim` $\times$ `bsr_col_dim` size. The [General Block Compressed Sparse Row (GEBSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#gebsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is the same as for the BSR format, but the blocks in which the sparse matrix is split are not squared. All of them are of `bsr_row_dim` $\times$ `bsr_col_dim` size.
Therefore, defining Therefore, defining

2
Libraries/rocSPARSE/level_3/gemmi/README.md

@ -36,7 +36,7 @@ where
### CSR Matrix Storage Format ### CSR Matrix Storage Format
The [Compressed Sparse Row (CSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays. The [Compressed Sparse Row (CSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
Defining Defining

2
Libraries/rocSPARSE/level_3/sddmm/README.md

@ -35,7 +35,7 @@ where
### CSR Matrix Storage Format ### CSR Matrix Storage Format
The [Compressed Sparse Row (CSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays. The [Compressed Sparse Row (CSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
Defining Defining

2
Libraries/rocSPARSE/level_3/spsm/README.md

@ -35,7 +35,7 @@ where
### CSR Matrix Storage Format ### CSR Matrix Storage Format
The [Compressed Sparse Row (CSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays. The [Compressed Sparse Row (CSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
Defining Defining

2
Libraries/rocSPARSE/preconditioner/bsric0/README.md

@ -29,7 +29,7 @@ $$A \approx L \cdot L^H.$$
### BSR Matrix Storage Format ### BSR Matrix Storage Format
The [Block Compressed Sparse Row (BSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero. The [Block Compressed Sparse Row (BSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero.
Therefore, defining Therefore, defining

2
Libraries/rocSPARSE/preconditioner/bsrilu0/README.md

@ -27,7 +27,7 @@ $$A \approx L \cdot U.$$
### BSR Matrix Storage Format ### BSR Matrix Storage Format
The [Block Compressed Sparse Row (BSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero. The [Block Compressed Sparse Row (BSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero.
Therefore, defining Therefore, defining

2
Libraries/rocSPARSE/preconditioner/csric0/README.md

@ -29,7 +29,7 @@ $$A \approx L \cdot L^H.$$
### CSR Matrix Storage Format ### CSR Matrix Storage Format
The [Compressed Sparse Row (CSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays. The [Compressed Sparse Row (CSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
Defining Defining

2
Libraries/rocSPARSE/preconditioner/csrilu0/README.md

@ -27,7 +27,7 @@ $$A \approx L \cdot U.$$
### CSR Matrix Storage Format ### CSR Matrix Storage Format
The [Compressed Sparse Row (CSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays. The [Compressed Sparse Row (CSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
Defining Defining

2
Libraries/rocSPARSE/preconditioner/csritilu0/README.md

@ -28,7 +28,7 @@ $$A \approx L \cdot U.$$
### CSR Matrix Storage Format ### CSR Matrix Storage Format
The [Compressed Sparse Row (CSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays. The [Compressed Sparse Row (CSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
Defining Defining

4
Libraries/rocThrust/README.md

@ -10,8 +10,8 @@ The examples in this subdirectory showcase the functionality of the [rocThrust](
- [CMake](https://cmake.org/download/) (at least version 3.21) - [CMake](https://cmake.org/download/) (at least version 3.21)
- OR GNU Make - available via the distribution's package manager - OR GNU Make - available via the distribution's package manager
- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.2/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x) - [ROCm](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html) (at least version 6.x.x)
- [rocThrust](https://github.com/rocmSoftwarePlatform/rocThrust): `rocthrust-dev` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.2/page/How_to_Install_ROCm.html). - [rocThrust](https://github.com/rocmSoftwarePlatform/rocThrust): `rocthrust-dev` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html).
### Windows ### Windows

2
README.md

@ -219,7 +219,7 @@ A collection of examples to enable new users to start using ROCm. Advanced users
- [CMake](https://cmake.org/download/) (at least version 3.21) - [CMake](https://cmake.org/download/) (at least version 3.21)
- A number of examples also support building via GNU Make - available through the distribution's package manager - A number of examples also support building via GNU Make - available through the distribution's package manager
- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x) - [ROCm](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html) (at least version 6.x.x)
- For example-specific prerequisites, see the example subdirectories. - For example-specific prerequisites, see the example subdirectories.
### Windows ### Windows

Loading…
Cancel
Save