Browse Source

Develop Stream: update to ROCm 6.1 (#138)

* Resolve "Update to ROCm 6.1"

* Fixed broken URLs

---------

Co-authored-by: Robin Voetter <robin@streamhpc.com>
pull/142/head
Beatriz Navidad Vilches 1 year ago committed by GitHub
parent
commit
f293afdd7f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 79
      .gitlab-ci.yml
  2. 3
      AI/MIGraphX/Quantization/README.md
  3. 2
      AI/MIGraphX/Quantization/Running-Quantized-ResNet50-via-MIGraphX.md
  4. 2
      Applications/README.md
  5. 114
      Dockerfiles/hip-libraries-cuda-ubuntu.Dockerfile
  6. 33
      Dockerfiles/hip-libraries-rocm-ubuntu.Dockerfile
  7. 4
      External/glad/glad.cpp
  8. 2
      HIP-Basic/README.md
  9. 2
      HIP-Basic/cooperative_groups/README.md
  10. 4
      HIP-Basic/device_query/README.md
  11. 4
      HIP-Basic/device_query/main.cpp
  12. 2
      HIP-Basic/multi_gpu_data_transfer/README.md
  13. 13
      HIP-Basic/texture_management/main.hip
  14. 2
      Libraries/hipBLAS/README.md
  15. 4
      Libraries/hipCUB/README.md
  16. 4
      Libraries/hipSOLVER/README.md
  17. 2
      Libraries/hipSOLVER/syevdx/README.md
  18. 4
      Libraries/rocBLAS/README.md
  19. 4
      Libraries/rocPRIM/README.md
  20. 4
      Libraries/rocRAND/README.md
  21. 8
      Libraries/rocSPARSE/README.md
  22. 2
      Libraries/rocSPARSE/level_2/bsrmv/README.md
  23. 2
      Libraries/rocSPARSE/level_2/bsrsv/README.md
  24. 2
      Libraries/rocSPARSE/level_2/bsrxmv/README.md
  25. 2
      Libraries/rocSPARSE/level_2/csritsv/README.md
  26. 2
      Libraries/rocSPARSE/level_2/csrmv/README.md
  27. 2
      Libraries/rocSPARSE/level_2/csrsv/README.md
  28. 2
      Libraries/rocSPARSE/level_2/gebsrmv/README.md
  29. 2
      Libraries/rocSPARSE/level_3/bsrmm/README.md
  30. 2
      Libraries/rocSPARSE/level_3/bsrsm/README.md
  31. 2
      Libraries/rocSPARSE/level_3/csrmm/README.md
  32. 2
      Libraries/rocSPARSE/level_3/csrsm/README.md
  33. 2
      Libraries/rocSPARSE/level_3/gebsrmm/README.md
  34. 2
      Libraries/rocSPARSE/level_3/gemmi/README.md
  35. 2
      Libraries/rocSPARSE/level_3/sddmm/README.md
  36. 2
      Libraries/rocSPARSE/level_3/spsm/README.md
  37. 2
      Libraries/rocSPARSE/preconditioner/bsric0/README.md
  38. 2
      Libraries/rocSPARSE/preconditioner/bsrilu0/README.md
  39. 2
      Libraries/rocSPARSE/preconditioner/csric0/README.md
  40. 2
      Libraries/rocSPARSE/preconditioner/csrilu0/README.md
  41. 2
      Libraries/rocSPARSE/preconditioner/csritilu0/README.md
  42. 4
      Libraries/rocThrust/README.md
  43. 2
      README.md

79
.gitlab-ci.yml

@ -1,6 +1,6 @@ @@ -1,6 +1,6 @@
# MIT License
#
# Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
# Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@ -37,6 +37,13 @@ variables: @@ -37,6 +37,13 @@ variables:
# argument unused during compilation: '--rtlib=compiler-rt'
CXX_FLAGS: "-Wno-unused-command-line-argument -Wall -Wextra -Werror"
HIP_FLAGS: "-Wno-unused-command-line-argument -Wall -Wextra -Werror"
# Keep in sync with ROCM_VERSION in Dockerfiles/hip-libraries-cuda-ubuntu.Dockerfile
# and Dockerfiles/hip-libraries-rocm-ubuntu.Dockerfile
DOCKER_ROCM_VERSION: 6.1.0
DOCKER_HIP_LIBRARIES_ROCM_TAG: rocm-ubuntu-${DOCKER_ROCM_VERSION}
DOCKER_HIP_LIBRARIES_CUDA_TAG: cuda-ubuntu-${DOCKER_ROCM_VERSION}
DOCKER_HIP_LIBRARIES_ROCM: $DOCKER_TAG_PREFIX:$DOCKER_HIP_LIBRARIES_ROCM_TAG
DOCKER_HIP_LIBRARIES_CUDA: $DOCKER_TAG_PREFIX:$DOCKER_HIP_LIBRARIES_CUDA_TAG
stages:
- lint
@ -44,7 +51,7 @@ stages: @@ -44,7 +51,7 @@ stages:
- test
clang-format:
image: $DOCKER_TAG_PREFIX:rocm-ubuntu
image: $DOCKER_HIP_LIBRARIES_ROCM
stage: lint
needs: []
tags:
@ -60,7 +67,7 @@ clang-format: @@ -60,7 +67,7 @@ clang-format:
- Scripts/CodeFormat/check_format.sh $CI_MERGE_REQUEST_DIFF_BASE_SHA --binary "$CLANG_FORMAT"
copyright-date:
image: $DOCKER_TAG_PREFIX:rocm-ubuntu
image: $DOCKER_HIP_LIBRARIES_ROCM
stage: lint
needs: []
tags:
@ -88,7 +95,7 @@ copyright-date: @@ -88,7 +95,7 @@ copyright-date:
- >-
/kaniko/executor
--context "${CI_PROJECT_DIR}/Dockerfiles"
--dockerfile "${CI_PROJECT_DIR}/Dockerfiles/hip-libraries-${TAG}.Dockerfile"
--dockerfile "${CI_PROJECT_DIR}/Dockerfiles/${DOCKERFILE}"
--destination "docker.io/${DOCKER_TAG_PREFIX}:${TAG}"
${NO_PUSH}
rules:
@ -103,20 +110,22 @@ build:rocm-ubuntu-dockerfile: @@ -103,20 +110,22 @@ build:rocm-ubuntu-dockerfile:
extends:
- .build:dockerfiles
variables:
TAG: rocm-ubuntu
DOCKERFILE: hip-libraries-rocm-ubuntu.Dockerfile
TAG: $DOCKER_HIP_LIBRARIES_ROCM_TAG
build:cuda-ubuntu-dockerfile:
extends:
- .build:dockerfiles
variables:
TAG: cuda-ubuntu
DOCKERFILE: hip-libraries-cuda-ubuntu.Dockerfile
TAG: $DOCKER_HIP_LIBRARIES_CUDA_TAG
########################
# Ubuntu make #
########################
build:make-rocm:
image: $DOCKER_TAG_PREFIX:rocm-ubuntu
image: $DOCKER_HIP_LIBRARIES_ROCM
stage: build
extends:
- .rules:build
@ -127,7 +136,7 @@ build:make-rocm: @@ -127,7 +136,7 @@ build:make-rocm:
- cd $CI_PROJECT_DIR && make CXXFLAGS="$HIP_FLAGS" -j $(nproc)
build:make-cuda:
image: $DOCKER_TAG_PREFIX:cuda-ubuntu
image: $DOCKER_HIP_LIBRARIES_CUDA
stage: build
extends:
- .rules:build
@ -151,7 +160,7 @@ build:make-cuda: @@ -151,7 +160,7 @@ build:make-cuda:
- $CI_PROJECT_DIR/build
build:cmake-rocm:
image: $DOCKER_TAG_PREFIX:rocm-ubuntu
image: $DOCKER_HIP_LIBRARIES_ROCM
extends:
- .build:cmake
- .gpus:rocm-gpus
@ -175,28 +184,28 @@ build:cmake-rocm: @@ -175,28 +184,28 @@ build:cmake-rocm:
- cmake --install $CI_PROJECT_DIR/build --prefix $CI_PROJECT_DIR/install
build:cmake-cuda:
image: $DOCKER_TAG_PREFIX:cuda-ubuntu
extends:
- .build:cmake
tags:
- build
script:
- cmake
-S $CI_PROJECT_DIR
-B $CI_PROJECT_DIR/build
-D GPU_RUNTIME=CUDA
-D CMAKE_CXX_FLAGS="$CXX_FLAGS"
-D CMAKE_CUDA_FLAGS="$CUDA_FLAGS"
-D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip
2>&1 | tee cmake_log.txt
# check if all dependencies were found
- |-
if grep -qi "could not find" cmake_log.txt; then
echo "Some CMake libraries could not be found"
exit 1
fi
- cmake --build $CI_PROJECT_DIR/build
- cmake --install $CI_PROJECT_DIR/build --prefix $CI_PROJECT_DIR/install
image: $DOCKER_HIP_LIBRARIES_CUDA
extends:
- .build:cmake
tags:
- build
script:
- cmake
-S $CI_PROJECT_DIR
-B $CI_PROJECT_DIR/build
-D GPU_RUNTIME=CUDA
-D CMAKE_CXX_FLAGS="$CXX_FLAGS"
-D CMAKE_CUDA_FLAGS="$CUDA_FLAGS"
-D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip
2>&1 | tee cmake_log.txt
# check if all dependencies were found
- |-
if grep -qi "could not find" cmake_log.txt; then
echo "Some CMake libraries could not be found"
exit 1
fi
- cmake --build $CI_PROJECT_DIR/build
- cmake --install $CI_PROJECT_DIR/build --prefix $CI_PROJECT_DIR/install
########################
# Ubuntu Tests #
@ -207,10 +216,10 @@ build:cmake-cuda: @@ -207,10 +216,10 @@ build:cmake-cuda:
extends:
- .rules:test
script:
- cd $CI_PROJECT_DIR/build && ctest --output-on-failure
- cd $CI_PROJECT_DIR/build && ctest --output-on-failure --parallel 8
test:rocm:
image: $DOCKER_TAG_PREFIX:rocm-ubuntu
image: $DOCKER_HIP_LIBRARIES_ROCM
extends:
- .test
- .gpus:rocm
@ -218,7 +227,7 @@ test:rocm: @@ -218,7 +227,7 @@ test:rocm:
- build:cmake-rocm
test:cuda:
image: $DOCKER_TAG_PREFIX:cuda-ubuntu
image: $DOCKER_HIP_LIBRARIES_CUDA
extends:
- .test
- .gpus:nvcc
@ -386,7 +395,7 @@ test:windows-nvcc-vs: @@ -386,7 +395,7 @@ test:windows-nvcc-vs:
# So for now, just add the library path here.
- $env:PATH = "${env:HIP_PATH}\bin;" + $env:PATH
- cd "$CI_PROJECT_DIR/build"
- ctest --output-on-failure --timeout 15
- ctest --output-on-failure --timeout 15 --parallel 8
- cmake --install "$CI_PROJECT_DIR/build" --prefix "$CI_PROJECT_DIR/install"
needs: []

3
AI/MIGraphX/Quantization/README.md

@ -1,7 +1,6 @@ @@ -1,7 +1,6 @@
# MIGraphX - Torch Examples
# Summary
## Summary
The examples in this subdirectory showcase the functionality for executing quantized models using MIGraphX. The Torch-MIGraphX integration library is used to achieve this, where PyTorch is used to quantize models, and MIGraphX is used to execute them on AMD GPUs.

2
AI/MIGraphX/Quantization/Running-Quantized-ResNet50-via-MIGraphX.md

@ -6,7 +6,7 @@ This example walks through the dynamo Post Training Quantization (PTQ) workflow @@ -6,7 +6,7 @@ This example walks through the dynamo Post Training Quantization (PTQ) workflow
## Prerequisites
- You must follow the installation instructions for the torch_migraphx library in [README.md](README.md) before using this example.
- You must follow the installation instructions for the torch_migraphx library in [AI/MIGraphX/Quantization](https://github.com/ROCm/rocm-examples/tree/develop/AI/MIGraphX/Quantization/) before using this example.
## Steps for running a quantized model using torch_migraphx

2
Applications/README.md

@ -10,7 +10,7 @@ The examples in this subdirectory showcase several GPU-implementations of financ @@ -10,7 +10,7 @@ The examples in this subdirectory showcase several GPU-implementations of financ
- [CMake](https://cmake.org/download/) (at least version 3.21)
- OR GNU Make - available via the distribution's package manager
- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x)
- [ROCm](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html) (at least version 6.x.x)
### Windows

114
Dockerfiles/hip-libraries-cuda-ubuntu.Dockerfile

@ -1,5 +1,13 @@ @@ -1,5 +1,13 @@
# syntax=docker/dockerfile:latest
# Above is required for substitutions in environment variables
# CUDA based docker image
FROM nvidia/cuda:12.0.0-devel-ubuntu20.04
FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
# The ROCm versions that this image is based of.
# Always write this down as major.minor.patch
ENV ROCM_VERSION=6.1.0
ENV ROCM_VERSION_APT=${ROCM_VERSION%.0}
# Base packages that are required for the installation
RUN export DEBIAN_FRONTEND=noninteractive; \
@ -19,17 +27,17 @@ RUN export DEBIAN_FRONTEND=noninteractive; \ @@ -19,17 +27,17 @@ RUN export DEBIAN_FRONTEND=noninteractive; \
vulkan-validationlayers \
libglfw3-dev \
gfortran \
# Nvidia driver version needed for hipSOLVER's CUDA backend.
# See https://docs.nvidia.com/deploy/cuda-compatibility/index.html#default-to-minor-version.
nvidia-driver-455 \
&& rm -rf /var/lib/apt/lists/*
# Install HIP using the installer script
# Install the HIP compiler and libraries from the ROCm repositories
RUN export DEBIAN_FRONTEND=noninteractive; \
wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - \
&& echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/6.0/ ubuntu main' > /etc/apt/sources.list.d/rocm.list \
mkdir -p /etc/apt/keyrings \
&& wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor > /etc/apt/keyrings/rocm.gpg \
&& echo "deb [arch=amd64, signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$ROCM_VERSION_APT/ jammy main" > /etc/apt/sources.list.d/rocm.list \
&& printf 'Package: *\nPin: origin "repo.radeon.com"\nPin-Priority: 9001\n' > /etc/apt/preferences.d/radeon.pref \
&& apt-get update -qq \
&& apt-get install -y hip-base hipify-clang rocm-core hipcc hip-dev
&& apt-get install -y hip-base hipify-clang rocm-core hipcc hip-dev rocm-llvm-dev \
&& rm -rf /var/lib/apt/lists/*
# Install CMake
RUN wget https://github.com/Kitware/CMake/releases/download/v3.21.7/cmake-3.21.7-linux-x86_64.sh \
@ -39,81 +47,87 @@ RUN wget https://github.com/Kitware/CMake/releases/download/v3.21.7/cmake-3.21.7 @@ -39,81 +47,87 @@ RUN wget https://github.com/Kitware/CMake/releases/download/v3.21.7/cmake-3.21.7
ENV PATH="/cmake/bin:/opt/rocm/bin:${PATH}"
ENV HIP_COMPILER=nvcc HIP_PLATFORM=nvidia HIP_RUNTIME=cuda
RUN echo "/opt/rocm/lib" >> /etc/ld.so.conf.d/rocm.conf \
&& ldconfig
ENV HIP_COMPILER=nvcc HIP_PLATFORM=nvidia HIP_RUNTIME=cuda
# Install rocRAND
RUN wget https://github.com/ROCm/rocRAND/archive/refs/tags/rocm-6.0.0.tar.gz \
&& tar -xf ./rocm-6.0.0.tar.gz \
&& rm ./rocm-6.0.0.tar.gz \
&& cmake -S ./rocRAND-rocm-6.0.0 -B ./rocRAND-rocm-6.0.0/build \
RUN wget https://github.com/ROCm/rocRAND/archive/refs/tags/rocm-${ROCM_VERSION}.tar.gz -O rocrand.tar.gz \
&& mkdir rocrand \
&& tar -xf ./rocrand.tar.gz --strip-components 1 -C rocrand \
&& rm ./rocrand.tar.gz \
&& cmake -S ./rocrand -B ./rocrand/build \
-D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip \
-D BUILD_HIPRAND=OFF \
-D CMAKE_INSTALL_PREFIX=/opt/rocm \
-D NVGPU_TARGETS="50" \
&& cmake --build ./rocRAND-rocm-6.0.0/build --target install \
&& rm -rf ./rocRAND-rocm-6.0.0
&& cmake --build ./rocrand/build --target install \
&& rm -rf ./rocrand
# Install hipCUB
RUN wget https://github.com/ROCm/hipCUB/archive/refs/tags/rocm-6.0.0.tar.gz \
&& tar -xf ./rocm-6.0.0.tar.gz \
&& rm ./rocm-6.0.0.tar.gz \
&& cmake -S ./hipCUB-rocm-6.0.0 -B ./hipCUB-rocm-6.0.0/build \
RUN wget https://github.com/ROCm/hipCUB/archive/refs/tags/rocm-${ROCM_VERSION}.tar.gz -O hipcub.tar.gz \
&& mkdir hipcub \
&& tar -xf ./hipcub.tar.gz --strip-components 1 -C hipcub \
&& rm ./hipcub.tar.gz \
&& cmake -S ./hipcub -B ./hipcub/build \
-D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip \
-D CMAKE_INSTALL_PREFIX=/opt/rocm \
&& cmake --build ./hipCUB-rocm-6.0.0/build --target install \
&& rm -rf ./hipCUB-rocm-6.0.0
&& cmake --build ./hipcub/build --target install \
&& rm -rf ./hipcub
# Install hipBLAS
# hipBLAS cmake for rocm-6.0.0 is broken added CXXFLAGS=-D__HIP_PLATFORM_NVIDIA__ as fix
RUN wget https://github.com/ROCm/hipBLAS/archive/refs/tags/rocm-6.0.0.tar.gz \
&& tar -xf ./rocm-6.0.0.tar.gz \
&& rm ./rocm-6.0.0.tar.gz \
&& CXXFLAGS=-D__HIP_PLATFORM_NVIDIA__ cmake -S ./hipBLAS-rocm-6.0.0 -B ./hipBLAS-rocm-6.0.0/build \
RUN wget https://github.com/ROCm/hipBLAS/archive/refs/tags/rocm-${ROCM_VERSION}.tar.gz -O hipblas.tar.gz \
&& mkdir hipblas \
&& tar -xf ./hipblas.tar.gz --strip-components 1 -C hipblas \
&& rm ./hipblas.tar.gz \
&& CXXFLAGS=-D__HIP_PLATFORM_NVIDIA__ cmake -S ./hipblas -B ./hipblas/build \
-D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip \
-D CMAKE_INSTALL_PREFIX=/opt/rocm \
-D USE_CUDA=ON \
&& cmake --build ./hipBLAS-rocm-6.0.0/build --target install \
&& rm -rf ./hipBLAS-rocm-6.0.0
&& cmake --build ./hipblas/build --target install \
&& rm -rf ./hipblas
# Install hipSOLVER
# hipSOLVER cmake for rocm-6.0.0 is broken added CXXFLAGS=-D__HIP_PLATFORM_NVIDIA__ as fix
RUN wget https://github.com/ROCm/hipSOLVER/archive/refs/tags/rocm-6.0.0.tar.gz \
&& tar -xf ./rocm-6.0.0.tar.gz \
&& rm ./rocm-6.0.0.tar.gz \
&& CXXFLAGS=-D__HIP_PLATFORM_NVIDIA__ cmake -S ./hipSOLVER-rocm-6.0.0 -B ./hipSOLVER-rocm-6.0.0/build \
# hipSOLVER cmake for rocm-6.1.0 is broken added CXXFLAGS=-D__HIP_PLATFORM_NVIDIA__ as fix
RUN wget https://github.com/ROCm/hipSOLVER/archive/refs/tags/rocm-${ROCM_VERSION}.tar.gz -O hipsolver.tar.gz \
&& mkdir hipsolver \
&& tar -xf ./hipsolver.tar.gz --strip-components 1 -C hipsolver \
&& rm ./hipsolver.tar.gz \
&& CXXFLAGS=-D__HIP_PLATFORM_NVIDIA__ cmake -S ./hipsolver -B ./hipsolver/build \
-D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip \
-D CMAKE_INSTALL_PREFIX=/opt/rocm \
-D USE_CUDA=ON \
&& cmake --build ./hipSOLVER-rocm-6.0.0/build --target install \
&& rm -rf ./hipSOLVER-rocm-6.0.0
&& cmake --build ./hipsolver/build --target install \
&& rm -rf ./hipsolver
# Install hipRAND
# Build from commit that removes deprecated macro use
RUN git clone https://github.com/ROCm/hipRAND.git hipRAND-rocm-6.0.0 \
&& cd hipRAND-rocm-6.0.0 \
&& git reset --hard 4925f0da96fad5b9f532ddc79f1f52fc279d329f \
&& cmake -S . -B ./build \
# Manually replace usage of __HIP_PLATFORM_NVCC__ with __HIP_PLATFORM_NVIDIA__. See
# https://github.com/ROCm/hipRAND/commit/4925f0da96fad5b9f532ddc79f1f52fc279d329f
RUN wget https://github.com/ROCm/hipRAND/archive/refs/tags/rocm-${ROCM_VERSION}.tar.gz -O hiprand.tar.gz \
&& mkdir hiprand \
&& tar -xf ./hiprand.tar.gz --strip-components 1 -C hiprand \
&& rm ./hiprand.tar.gz \
&& sed -i s/__HIP_PLATFORM_NVCC__/__HIP_PLATFORM_NVIDIA__/ ./hiprand/library/include/hiprand/hiprand.h \
&& cmake -S ./hiprand -B ./hiprand/build \
-D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip \
-D CMAKE_INSTALL_PREFIX=/opt/rocm \
-D BUILD_WITH_LIB=CUDA \
-D NVGPU_TARGETS="50" \
&& cmake --build ./build --target install \
&& cd .. \
&& rm -rf ./hipRAND-rocm-6.0.0
&& cmake --build ./hiprand/build --target install \
&& rm -rf ./hiprand
# Install hipFFT
RUN wget https://github.com/ROCm/hipFFT/archive/refs/tags/rocm-6.0.0.tar.gz \
&& tar -xf ./rocm-6.0.0.tar.gz \
&& rm ./rocm-6.0.0.tar.gz \
&& cmake -S ./hipFFT-rocm-6.0.0 -B ./hipFFT-rocm-6.0.0/build \
RUN wget https://github.com/ROCm/hipFFT/archive/refs/tags/rocm-${ROCM_VERSION}.tar.gz -O hipfft.tar.gz \
&& mkdir hipfft \
&& tar -xf ./hipfft.tar.gz --strip-components 1 -C hipfft \
&& rm ./hipfft.tar.gz \
&& cmake -S ./hipfft -B ./hipfft/build \
-D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip \
-D CMAKE_INSTALL_PREFIX=/opt/rocm \
-D BUILD_WITH_LIB=CUDA \
&& cmake --build ./hipFFT-rocm-6.0.0/build --target install \
&& rm -rf ./hipFFT-rocm-6.0.0
&& cmake --build ./hipfft/build --target install \
&& rm -rf ./hipfft
# Use render group as an argument from user
ARG GID=109

33
Dockerfiles/hip-libraries-rocm-ubuntu.Dockerfile

@ -1,5 +1,13 @@ @@ -1,5 +1,13 @@
# syntax=docker/dockerfile:latest
# Above is required for substitutions in environment variables
# Ubuntu based docker image
FROM ubuntu:20.04
FROM ubuntu:22.04
# The ROCm versions that this image is based of.
# Always write this down as major.minor.patch
ENV ROCM_VERSION=6.1.0
ENV ROCM_VERSION_APT=${ROCM_VERSION%.0}
# Base packages that are required for the installation
RUN export DEBIAN_FRONTEND=noninteractive; \
@ -18,18 +26,29 @@ RUN export DEBIAN_FRONTEND=noninteractive; \ @@ -18,18 +26,29 @@ RUN export DEBIAN_FRONTEND=noninteractive; \
libvulkan-dev \
vulkan-validationlayers \
libglfw3-dev \
gnupg \
g++ \
&& rm -rf /var/lib/apt/lists/*
ENV LANG en_US.utf8
# Install ROCM HIP and libraries using the installer script
# Install the HIP compiler and libraries from the ROCm repositories
RUN export DEBIAN_FRONTEND=noninteractive; \
wget https://repo.radeon.com/amdgpu-install/6.0/ubuntu/focal/amdgpu-install_6.0.60000-1_all.deb \
mkdir -p /etc/apt/keyrings \
&& wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor > /etc/apt/keyrings/rocm.gpg \
&& echo "deb [arch=amd64, signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$ROCM_VERSION_APT/ jammy main" > /etc/apt/sources.list.d/rocm.list \
&& printf 'Package: *\nPin: origin "repo.radeon.com"\nPin-Priority: 9001\n' > /etc/apt/preferences.d/radeon.pref \
&& apt-get update -qq \
&& apt-get install -y ./amdgpu-install_6.0.60000-1_all.deb \
&& rm ./amdgpu-install_6.0.60000-1_all.deb\
&& amdgpu-install -y --usecase=hiplibsdk --no-dkms \
&& apt-get install -y libnuma-dev \
&& apt-get install --no-install-recommends -y \
hip-base hipify-clang rocm-core hipcc \
hip-dev rocm-hip-runtime-dev rocm-llvm-dev \
rocrand-dev hiprand-dev \
rocprim-dev hipcub-dev \
rocblas-dev hipblas-dev \
rocsolver-dev hipsolver-dev \
rocfft-dev hipfft-dev \
rocsparse-dev \
rocthrust-dev \
&& rm -rf /var/lib/apt/lists/*
# Install CMake

4
External/glad/glad.cpp vendored

@ -1867,8 +1867,8 @@ static void find_coreGL(void) @@ -1867,8 +1867,8 @@ static void find_coreGL(void)
{
/* Thank you @elmindreda
* https://github.com/elmindreda/greg/blob/master/templates/greg.c.in#L176
* https://github.com/glfw/glfw/blob/master/src/context.c#L36
* https://github.com/elmindreda/greg/blob/master/templates/greg.h.in
* https://github.com/glfw/glfw/blob/master/src/context.c
*/
int i, major, minor;

2
HIP-Basic/README.md

@ -10,7 +10,7 @@ The examples in this subdirectory showcase the functionality of the HIP runtime. @@ -10,7 +10,7 @@ The examples in this subdirectory showcase the functionality of the HIP runtime.
- [CMake](https://cmake.org/download/) (at least version 3.21)
- OR GNU Make - available via the distribution's package manager
- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x)
- [ROCm](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html) (at least version 6.x.x)
### Windows

2
HIP-Basic/cooperative_groups/README.md

@ -35,7 +35,7 @@ The partitioned threads can reside across multiple devices. @@ -35,7 +35,7 @@ The partitioned threads can reside across multiple devices.
- `thread_block`
- `tiled_partition<size>()`
- `thread_block_tile`
- All above from the [`cooperative_groups` namespace](https://github.com/ROCm-Developer-Tools/hipamd/blob/develop/include/hip/amd_detail/amd_hip_cooperative_groups.h)
- All above from the [`cooperative_groups` namespace](https://github.com/ROCm/clr/blob/develop/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h)
#### Host symbols

4
HIP-Basic/device_query/README.md

@ -16,9 +16,9 @@ This example shows how the target platform and compiler can be identified, as we @@ -16,9 +16,9 @@ This example shows how the target platform and compiler can be identified, as we
## Key APIs and Concepts
- HIP code can target the AMD and the NVIDIA platform, and it can be compiled with different compilers. Compiler-defined macros can be used in HIP code to write code that is specific to a target or a compiler. See [HIP Programming Guide - Distinguishing Compiler Modes](https://docs.amd.com/bundle/HIP-Programming-Guide-v5.2/page/Transitioning_from_CUDA_to_HIP.html#d4438e664) for more details.
- HIP code can target the AMD and the NVIDIA platform, and it can be compiled with different compilers. Compiler-defined macros can be used in HIP code to write code that is specific to a target or a compiler. See [HIP Programming Guide - Distinguishing Compiler Modes](https://rocm.docs.amd.com/projects/HIP/en/latest/how-to/hip_porting_guide.html#distinguishing-compiler-modes) for more details.
- `hipGetDeviceCount` returns the number of devices in the system. Some device management API functions take an identifier for each device, which is a monotonically incrementing number starting from zero. Others require the active device to be set, with `hipSetDevice`. A full overview of the device management API can be found at [HIP API - Device Management](https://docs.amd.com/bundle/HIP_API_Guide/page/group___device.html).
- `hipGetDeviceCount` returns the number of devices in the system. Some device management API functions take an identifier for each device, which is a monotonically incrementing number starting from zero. Others require the active device to be set, with `hipSetDevice`. A full overview of the device management API can be found at [HIP API - Device Management](https://rocm.docs.amd.com/projects/HIP/en/latest/doxygen/html/group___device.html).
## Demonstrated API Calls

4
HIP-Basic/device_query/main.cpp

@ -1,6 +1,6 @@ @@ -1,6 +1,6 @@
// MIT License
//
// Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
@ -63,7 +63,7 @@ void print_device_properties(int device_id) @@ -63,7 +63,7 @@ void print_device_properties(int device_id)
HIP_CHECK(hipGetDeviceProperties(&props, device_id));
// Print a small set of all available properties. A full list can be found at:
// https://docs.amd.com/bundle/HIP_API_Guide/page/structhip_device_prop__t.html
// https://rocm.docs.amd.com/projects/HIP/en/latest/doxygen/html/structhip_device_prop__t.html
std::cout << std::setw(col_w) << "Name: " << props.name << '\n';
std::cout << std::setw(col_w)
<< "totalGlobalMem: " << double_precision(bytes_to_gib(props.totalGlobalMem), 2, true)

2
HIP-Basic/multi_gpu_data_transfer/README.md

@ -32,7 +32,7 @@ In this example, the result of a matrix transpose kernel execution on one device @@ -32,7 +32,7 @@ In this example, the result of a matrix transpose kernel execution on one device
- With `hipMemcpy` data bytes can be transferred from host to device (using `hipMemcpyHostToDevice`), from device to host (using `hipMemcpyDeviceToHost`) or from device to device (using `hipMemcpyDeviceToDevice`). The latter will only work if P2P communication has been enabled from the destination to the source device.
- `myKernelName<<<...>>>` queues the execution of a kernel in the current device and `hipDeviceSynchronize` makes the host to wait on all active streams on the current device. In this example `hipDeviceSynchronize` is necessary because the second device needs the results obtained from the previous kernel execution on the first device.
- `hipDeviceReset` discards the state of the current device and updates it to fresh one. It also frees all the resources (e.g. streams, events, ...) associated with the current device.
- It's a [known issue with multi-GPU environments](https://community.amd.com/t5/knowledge-base/iommu-advisory-for-multi-gpu-environments/ta-p/477468) that some multi-GPU environments fail due to limitations of the IOMMU enablement, so it may be needed to explicitly enable/disable the IOMMU using the kernel command-line parameter `iommu=pt/off`.
- It's a [known issue with multi-GPU environments](https://community.amd.com/t5/knowledge-base/iommu-advisory-for-amd-instinct/ta-p/484601) that some multi-GPU environments fail due to limitations of the IOMMU enablement, so it may be needed to explicitly enable/disable the IOMMU using the kernel command-line parameter `iommu=pt/off`.
## Demonstrated API Calls

13
HIP-Basic/texture_management/main.hip

@ -1,6 +1,6 @@ @@ -1,6 +1,6 @@
// MIT License
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
@ -47,8 +47,17 @@ __global__ void histogram_kernel(unsigned int* histogram, @@ -47,8 +47,17 @@ __global__ void histogram_kernel(unsigned int* histogram,
float u = x / static_cast<float>(size_x) + .5f;
float v = y / static_cast<float>(size_y) + .5f;
// Read the value from the texture.
#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT
// Read the value from the texture, if supported.
unsigned char val = tex2D<unsigned char>(tex_obj, u, v);
#else
// Prevent compile errors on HIP architectures that do not support
// texture instructions.
(void)u;
(void)v;
(void)tex_obj;
unsigned char val = 0;
#endif
// Determine the histogram bin and write to global memory.
unsigned int bin_range = ceiling_div(256, hist_bin_count);

2
Libraries/hipBLAS/README.md

@ -10,7 +10,7 @@ The examples in this subdirectory showcase the functionality of the [hipBLAS](ht @@ -10,7 +10,7 @@ The examples in this subdirectory showcase the functionality of the [hipBLAS](ht
- [CMake](https://cmake.org/download/) (at least version 3.21)
- OR GNU Make - available via the distribution's package manager
- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.2/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x)
- [ROCm](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html) (at least version 6.x.x)
- [hipBLAS](https://github.com/ROCmSoftwarePlatform/hipBLAS): `hipblas` package available from [repo.radeon.com](https://repo.radeon.com/rocm/).
### Windows

4
Libraries/hipCUB/README.md

@ -10,9 +10,9 @@ The examples in this subdirectory showcase the functionality of the [hipCUB](htt @@ -10,9 +10,9 @@ The examples in this subdirectory showcase the functionality of the [hipCUB](htt
- [CMake](https://cmake.org/download/) (at least version 3.21)
- OR GNU Make - available via the distribution's package manager
- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x)
- [ROCm](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html) (at least version 6.x.x)
- [hipCUB](https://github.com/ROCmSoftwarePlatform/hipCUB)
- ROCm platform: `hipCUB-dev` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/How_to_Install_ROCm.html).
- ROCm platform: `hipCUB-dev` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html).
- CUDA platform: Install hipCUB from source: [instructions](https://github.com/ROCmSoftwarePlatform/hipCUB#build-and-install).
- [CUB](https://github.com/NVIDIA/cub) is a dependency of hipCUB for NVIDIA platforms. CUB is part of the NVIDIA CUDA Toolkit.

4
Libraries/hipSOLVER/README.md

@ -10,8 +10,8 @@ The examples in this subdirectory showcase the functionality of the [hipSOLVER]( @@ -10,8 +10,8 @@ The examples in this subdirectory showcase the functionality of the [hipSOLVER](
- [CMake](https://cmake.org/download/) (at least version 3.21)
- OR GNU Make - available via the distribution's package manager
- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.2/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x)
- [hipSOLVER](https://github.com/ROCmSoftwarePlatform/hipSOLVER): `hipsolver` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.2/page/How_to_Install_ROCm.html).
- [ROCm](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html) (at least version 6.x.x)
- [hipSOLVER](https://github.com/ROCmSoftwarePlatform/hipSOLVER): `hipsolver` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html).
### Windows

2
Libraries/hipSOLVER/syevdx/README.md

@ -2,7 +2,7 @@ @@ -2,7 +2,7 @@
## Description
This example illustrates how to solve the standard symmetric-definite eigenvalue problem for a symmetric matrix $A$ using hipSOLVER's [Compatibility API](https://hipsolver.readthedocs.io/en/rocm-5.4.4/compat_index.html). This API offers wrapper functions for the ones existing in hipSOLVER (and their equivalents in [cuSolverDN](https://docs.nvidia.com/cuda/cusolver/index.html#cusolverdn-dense-lapack)) and is intended to be used when porting cuSOLVER applications to hipSOLVER ones. The main advantage of this API is that its functions follow the same method signature format as cuSolverDN's, which makes easier the port.
This example illustrates how to solve the standard symmetric-definite eigenvalue problem for a symmetric matrix $A$ using hipSOLVER's [Compatibility API](https://rocm.docs.amd.com/projects/hipSOLVER/en/latest/reference/compat-api/lapacklike.html). This API offers wrapper functions for the ones existing in hipSOLVER (and their equivalents in [cuSolverDN](https://docs.nvidia.com/cuda/cusolver/index.html#cusolverdn-dense-lapack)) and is intended to be used when porting cuSOLVER applications to hipSOLVER ones. The main advantage of this API is that its functions follow the same method signature format as cuSolverDN's, which makes easier the port.
Given an $n \times n$ symmetric matrix $A$, the said problem consists on solving the following equation:

4
Libraries/rocBLAS/README.md

@ -10,8 +10,8 @@ The examples in this subdirectory showcase the functionality of the [rocBLAS](ht @@ -10,8 +10,8 @@ The examples in this subdirectory showcase the functionality of the [rocBLAS](ht
- [CMake](https://cmake.org/download/) (at least version 3.21)
- OR GNU Make - available via the distribution's package manager
- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.2/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x)
- [rocBLAS](https://github.com/ROCmSoftwarePlatform/rocBLAS): `rocblas` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.2/page/How_to_Install_ROCm.html).
- [ROCm](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html) (at least version 6.x.x)
- [rocBLAS](https://github.com/ROCmSoftwarePlatform/rocBLAS): `rocblas` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html).
### Windows

4
Libraries/rocPRIM/README.md

@ -10,9 +10,9 @@ The examples in this subdirectory showcase the functionality of the [rocPRIM](ht @@ -10,9 +10,9 @@ The examples in this subdirectory showcase the functionality of the [rocPRIM](ht
- [CMake](https://cmake.org/download/) (at least version 3.21)
- OR GNU Make - available via the distribution's package manager
- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x)
- [ROCm](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html) (at least version 6.x.x)
- [rocPRIM](https://github.com/ROCmSoftwarePlatform/rocPRIM)
- `rocPRIM-dev` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/How_to_Install_ROCm.html).
- `rocPRIM-dev` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html).
### Windows

4
Libraries/rocRAND/README.md

@ -10,9 +10,9 @@ The examples in this subdirectory showcase the functionality of the [rocRAND](ht @@ -10,9 +10,9 @@ The examples in this subdirectory showcase the functionality of the [rocRAND](ht
- [CMake](https://cmake.org/download/) (at least version 3.21)
- OR GNU Make - available via the distribution's package manager
- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x) OR the HIP Nvidia runtime (on the CUDA platform)
- [ROCm](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html) (at least version 6.x.x) OR the HIP Nvidia runtime (on the CUDA platform)
- [rocRAND](https://github.com/rocmSoftwarePlatform/rocRAND)
- ROCm platform: `rocrand-dev` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/How_to_Install_ROCm.html).
- ROCm platform: `rocrand-dev` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html).
- CUDA platform: Install rocRAND from source: [instructions](https://github.com/rocmSoftwarePlatform/rocRAND#build-and-install).
### Windows

8
Libraries/rocSPARSE/README.md

@ -16,13 +16,13 @@ All rocSPARSE library functions, unless otherwise stated, are non blocking and e @@ -16,13 +16,13 @@ All rocSPARSE library functions, unless otherwise stated, are non blocking and e
- OR GNU Make - available via the distribution's package manager
- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x) OR the HIP Nvidia runtime (on the CUDA platform)
- [ROCm](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html) (at least version 6.x.x) OR the HIP Nvidia runtime (on the CUDA platform)
- [rocSPARSE](https://github.com/rocmSoftwarePlatform/rocSPARSE)
- ROCm platform: `rocsparse` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/How_to_Install_ROCm.html).
- ROCm platform: `rocsparse` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html).
- CUDA platform: Install rocSPARSE from source: [instructions](https://rocsparse.readthedocs.io/en/rocm-5.5.0/usermanual.html#building-rocsparse-from-source).
- CUDA platform: Install rocSPARSE from source: [instructions](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/install/Linux_Install_Guide.html).
### Windows
@ -35,7 +35,7 @@ All rocSPARSE library functions, unless otherwise stated, are non blocking and e @@ -35,7 +35,7 @@ All rocSPARSE library functions, unless otherwise stated, are non blocking and e
- [rocSPARSE](https://github.com/rocmSoftwarePlatform/rocSPARSE)
- ROCm platform: Installed as part of the ROCm SDK on Windows.
- CUDA platform: Install rocSPARSE from source: [instructions](https://rocsparse.readthedocs.io/en/rocm-5.5.0/usermanual.html#building-rocsparse-from-source).
- CUDA platform: Install rocSPARSE from source: [instructions](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/install/Linux_Install_Guide.html).
- [CMake](https://cmake.org/download/) (optional, to build with CMake. Requires at least version 3.21)

2
Libraries/rocSPARSE/level_2/bsrmv/README.md

@ -29,7 +29,7 @@ where @@ -29,7 +29,7 @@ where
### BSR Matrix Storage Format
The [Block Compressed Sparse Row (BSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero.
The [Block Compressed Sparse Row (BSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero.
Therefore, defining

2
Libraries/rocSPARSE/level_2/bsrsv/README.md

@ -39,7 +39,7 @@ Obtaining the solution for such a system consists of finding concrete values of @@ -39,7 +39,7 @@ Obtaining the solution for such a system consists of finding concrete values of
### BSR Matrix Storage Format
The [Block Compressed Sparse Row (BSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero.
The [Block Compressed Sparse Row (BSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero.
Therefore, defining

2
Libraries/rocSPARSE/level_2/bsrxmv/README.md

@ -32,7 +32,7 @@ otherwise it returns the identical $\mathbf{y}$ vector elements. @@ -32,7 +32,7 @@ otherwise it returns the identical $\mathbf{y}$ vector elements.
### BSR Matrix Storage Format
The [Block Compressed Sparse Row (BSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero.
The [Block Compressed Sparse Row (BSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero.
Therefore, defining

2
Libraries/rocSPARSE/level_2/csritsv/README.md

@ -40,7 +40,7 @@ Obtaining solution for such a system consists on finding concrete values of all @@ -40,7 +40,7 @@ Obtaining solution for such a system consists on finding concrete values of all
### CSR Matrix Storage Format
The [Compressed Sparse Row (CSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
The [Compressed Sparse Row (CSR) storage format](https://rocm.docs.amd.com/projects/hipSPARSE/en/latest/basics.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
Defining

2
Libraries/rocSPARSE/level_2/csrmv/README.md

@ -29,7 +29,7 @@ where @@ -29,7 +29,7 @@ where
### CSR Matrix Storage Format
The [Compressed Sparse Row (CSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
The [Compressed Sparse Row (CSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
Defining

2
Libraries/rocSPARSE/level_2/csrsv/README.md

@ -39,7 +39,7 @@ Obtaining solution for such a system consists on finding concrete values of all @@ -39,7 +39,7 @@ Obtaining solution for such a system consists on finding concrete values of all
### CSR Matrix Storage Format
The [Compressed Sparse Row (CSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
The [Compressed Sparse Row (CSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
Defining

2
Libraries/rocSPARSE/level_2/gebsrmv/README.md

@ -33,7 +33,7 @@ where @@ -33,7 +33,7 @@ where
### GEBSR Matrix Storage Format
The [General Block Compressed Sparse Row (GEBSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#gebsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is the same as for the BSR format, but the blocks in which the sparse matrix is split are not squared. All of them are of `bsr_row_dim` $\times$ `bsr_col_dim` size.
The [General Block Compressed Sparse Row (GEBSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#gebsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is the same as for the BSR format, but the blocks in which the sparse matrix is split are not squared. All of them are of `bsr_row_dim` $\times$ `bsr_col_dim` size.
Therefore, defining

2
Libraries/rocSPARSE/level_3/bsrmm/README.md

@ -30,7 +30,7 @@ where @@ -30,7 +30,7 @@ where
### BSR Matrix Storage Format
The [Block Compressed Sparse Row (BSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero.
The [Block Compressed Sparse Row (BSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero.
Therefore, defining

2
Libraries/rocSPARSE/level_3/bsrsm/README.md

@ -42,7 +42,7 @@ This is the same as solving the classical system of linear equations $op_a(A) x_ @@ -42,7 +42,7 @@ This is the same as solving the classical system of linear equations $op_a(A) x_
### BSR Matrix Storage Format
The [Block Compressed Sparse Row (BSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero.
The [Block Compressed Sparse Row (BSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero.
Therefore, defining

2
Libraries/rocSPARSE/level_3/csrmm/README.md

@ -30,7 +30,7 @@ where @@ -30,7 +30,7 @@ where
### CSR Matrix Storage Format
The [Compressed Sparse Row (CSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
The [Compressed Sparse Row (CSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
Defining

2
Libraries/rocSPARSE/level_3/csrsm/README.md

@ -43,7 +43,7 @@ This is the same as solving the classical system of linear equations $op_a(A) x_ @@ -43,7 +43,7 @@ This is the same as solving the classical system of linear equations $op_a(A) x_
### CSR Matrix Storage Format
The [Compressed Sparse Row (CSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
The [Compressed Sparse Row (CSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
Defining

2
Libraries/rocSPARSE/level_3/gebsrmm/README.md

@ -30,7 +30,7 @@ where @@ -30,7 +30,7 @@ where
### GEBSR Matrix Storage Format
The [General Block Compressed Sparse Row (GEBSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#gebsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is the same as for the BSR format, but the blocks in which the sparse matrix is split are not squared. All of them are of `bsr_row_dim` $\times$ `bsr_col_dim` size.
The [General Block Compressed Sparse Row (GEBSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#gebsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is the same as for the BSR format, but the blocks in which the sparse matrix is split are not squared. All of them are of `bsr_row_dim` $\times$ `bsr_col_dim` size.
Therefore, defining

2
Libraries/rocSPARSE/level_3/gemmi/README.md

@ -36,7 +36,7 @@ where @@ -36,7 +36,7 @@ where
### CSR Matrix Storage Format
The [Compressed Sparse Row (CSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
The [Compressed Sparse Row (CSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
Defining

2
Libraries/rocSPARSE/level_3/sddmm/README.md

@ -35,7 +35,7 @@ where @@ -35,7 +35,7 @@ where
### CSR Matrix Storage Format
The [Compressed Sparse Row (CSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
The [Compressed Sparse Row (CSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
Defining

2
Libraries/rocSPARSE/level_3/spsm/README.md

@ -35,7 +35,7 @@ where @@ -35,7 +35,7 @@ where
### CSR Matrix Storage Format
The [Compressed Sparse Row (CSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
The [Compressed Sparse Row (CSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
Defining

2
Libraries/rocSPARSE/preconditioner/bsric0/README.md

@ -29,7 +29,7 @@ $$A \approx L \cdot L^H.$$ @@ -29,7 +29,7 @@ $$A \approx L \cdot L^H.$$
### BSR Matrix Storage Format
The [Block Compressed Sparse Row (BSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero.
The [Block Compressed Sparse Row (BSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero.
Therefore, defining

2
Libraries/rocSPARSE/preconditioner/bsrilu0/README.md

@ -27,7 +27,7 @@ $$A \approx L \cdot U.$$ @@ -27,7 +27,7 @@ $$A \approx L \cdot U.$$
### BSR Matrix Storage Format
The [Block Compressed Sparse Row (BSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero.
The [Block Compressed Sparse Row (BSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#bsr-storage-format) describes a sparse matrix using three arrays. The idea behind this storage format is to split the given sparse matrix into equal sized blocks of dimension `bsr_dim` and store those using the [CSR format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format). Because the CSR format only stores non-zero elements, the BSR format introduces the concept of __non-zero block__: a block that contains at least one non-zero element. Note that all elements of non-zero blocks are stored, even if some of them are equal to zero.
Therefore, defining

2
Libraries/rocSPARSE/preconditioner/csric0/README.md

@ -29,7 +29,7 @@ $$A \approx L \cdot L^H.$$ @@ -29,7 +29,7 @@ $$A \approx L \cdot L^H.$$
### CSR Matrix Storage Format
The [Compressed Sparse Row (CSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
The [Compressed Sparse Row (CSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
Defining

2
Libraries/rocSPARSE/preconditioner/csrilu0/README.md

@ -27,7 +27,7 @@ $$A \approx L \cdot U.$$ @@ -27,7 +27,7 @@ $$A \approx L \cdot U.$$
### CSR Matrix Storage Format
The [Compressed Sparse Row (CSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
The [Compressed Sparse Row (CSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
Defining

2
Libraries/rocSPARSE/preconditioner/csritilu0/README.md

@ -28,7 +28,7 @@ $$A \approx L \cdot U.$$ @@ -28,7 +28,7 @@ $$A \approx L \cdot U.$$
### CSR Matrix Storage Format
The [Compressed Sparse Row (CSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
The [Compressed Sparse Row (CSR) storage format](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays.
Defining

4
Libraries/rocThrust/README.md

@ -10,8 +10,8 @@ The examples in this subdirectory showcase the functionality of the [rocThrust]( @@ -10,8 +10,8 @@ The examples in this subdirectory showcase the functionality of the [rocThrust](
- [CMake](https://cmake.org/download/) (at least version 3.21)
- OR GNU Make - available via the distribution's package manager
- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.2/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x)
- [rocThrust](https://github.com/rocmSoftwarePlatform/rocThrust): `rocthrust-dev` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.2/page/How_to_Install_ROCm.html).
- [ROCm](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html) (at least version 6.x.x)
- [rocThrust](https://github.com/rocmSoftwarePlatform/rocThrust): `rocthrust-dev` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html).
### Windows

2
README.md

@ -219,7 +219,7 @@ A collection of examples to enable new users to start using ROCm. Advanced users @@ -219,7 +219,7 @@ A collection of examples to enable new users to start using ROCm. Advanced users
- [CMake](https://cmake.org/download/) (at least version 3.21)
- A number of examples also support building via GNU Make - available through the distribution's package manager
- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x)
- [ROCm](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html) (at least version 6.x.x)
- For example-specific prerequisites, see the example subdirectories.
### Windows

Loading…
Cancel
Save