Browse Source

Develop Stream: update to ROCm 6.2 (#165)

* update docker images and CI to ROCm 6.2.0

* fix rocfft brick bounds parameter order

* update formatting for rocm 6.2

---------

Co-authored-by: Robin Voetter <robin@streamhpc.com>
pull/167/head rocm-test-09212024
Beatriz Navidad Vilches 10 months ago committed by GitHub
parent
commit
0fdfd7f6e7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 2
      .gitlab-ci.yml
  2. 2
      Common/example_utils.hpp
  3. 14
      Dockerfiles/hip-libraries-cuda-ubuntu.Dockerfile
  4. 2
      Dockerfiles/hip-libraries-rocm-ubuntu.Dockerfile
  5. 4
      Libraries/hipFFT/plan_z2z/main.cpp
  6. 17
      Libraries/rocFFT/multi_gpu/main.cpp
  7. 5
      Libraries/rocRAND/simple_distributions_cpp/main.cpp

2
.gitlab-ci.yml

@ -39,7 +39,7 @@ variables: @@ -39,7 +39,7 @@ variables:
HIP_FLAGS: "-Wno-unused-command-line-argument -Wall -Wextra -Werror"
# Keep in sync with ROCM_VERSION in Dockerfiles/hip-libraries-cuda-ubuntu.Dockerfile
# and Dockerfiles/hip-libraries-rocm-ubuntu.Dockerfile
DOCKER_ROCM_VERSION: 6.1.0
DOCKER_ROCM_VERSION: 6.2.0
DOCKER_HIP_LIBRARIES_ROCM_TAG: rocm-ubuntu-${DOCKER_ROCM_VERSION}
DOCKER_HIP_LIBRARIES_CUDA_TAG: cuda-ubuntu-${DOCKER_ROCM_VERSION}
DOCKER_HIP_LIBRARIES_ROCM: $DOCKER_TAG_PREFIX:$DOCKER_HIP_LIBRARIES_ROCM_TAG

2
Common/example_utils.hpp

@ -1,6 +1,6 @@ @@ -1,6 +1,6 @@
// MIT License
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal

14
Dockerfiles/hip-libraries-cuda-ubuntu.Dockerfile

@ -2,11 +2,11 @@ @@ -2,11 +2,11 @@
# Above is required for substitutions in environment variables
# CUDA based docker image
FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
FROM nvidia/cuda:12.6.0-devel-ubuntu22.04
# The ROCm versions that this image is based of.
# Always write this down as major.minor.patch
ENV ROCM_VERSION=6.1.0
ENV ROCM_VERSION=6.2.0
ENV ROCM_VERSION_APT=${ROCM_VERSION%.0}
# Base packages that are required for the installation
@ -53,10 +53,14 @@ RUN echo "/opt/rocm/lib" >> /etc/ld.so.conf.d/rocm.conf \ @@ -53,10 +53,14 @@ RUN echo "/opt/rocm/lib" >> /etc/ld.so.conf.d/rocm.conf \
ENV HIP_COMPILER=nvcc HIP_PLATFORM=nvidia HIP_RUNTIME=cuda
# Install rocRAND
# We need to apply this patch to make it work on Nvidia for ROCm 6.2: https://github.com/ROCm/rocRAND/commit/7ec5fda5243e599d83af841b5c38198a2f7f05fa
RUN wget https://github.com/ROCm/rocRAND/archive/refs/tags/rocm-${ROCM_VERSION}.tar.gz -O rocrand.tar.gz \
&& mkdir rocrand \
&& tar -xf ./rocrand.tar.gz --strip-components 1 -C rocrand \
&& rm ./rocrand.tar.gz \
&& wget https://github.com/ROCm/rocRAND/commit/7ec5fda5243e599d83af841b5c38198a2f7f05fa.patch -O rocrand.patch \
&& patch -p1 -d rocrand < ./rocrand.patch \
&& rm rocrand.patch \
&& cmake -S ./rocrand -B ./rocrand/build \
-D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip \
-D BUILD_HIPRAND=OFF \
@ -89,12 +93,11 @@ RUN wget https://github.com/ROCm/hipBLAS/archive/refs/tags/rocm-${ROCM_VERSION}. @@ -89,12 +93,11 @@ RUN wget https://github.com/ROCm/hipBLAS/archive/refs/tags/rocm-${ROCM_VERSION}.
&& rm -rf ./hipblas
# Install hipSOLVER
# hipSOLVER cmake for rocm-6.1.0 is broken added CXXFLAGS=-D__HIP_PLATFORM_NVIDIA__ as fix
RUN wget https://github.com/ROCm/hipSOLVER/archive/refs/tags/rocm-${ROCM_VERSION}.tar.gz -O hipsolver.tar.gz \
&& mkdir hipsolver \
&& tar -xf ./hipsolver.tar.gz --strip-components 1 -C hipsolver \
&& rm ./hipsolver.tar.gz \
&& CXXFLAGS=-D__HIP_PLATFORM_NVIDIA__ cmake -S ./hipsolver -B ./hipsolver/build \
&& cmake -S ./hipsolver -B ./hipsolver/build \
-D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip \
-D CMAKE_INSTALL_PREFIX=/opt/rocm \
-D USE_CUDA=ON \
@ -102,13 +105,10 @@ RUN wget https://github.com/ROCm/hipSOLVER/archive/refs/tags/rocm-${ROCM_VERSION @@ -102,13 +105,10 @@ RUN wget https://github.com/ROCm/hipSOLVER/archive/refs/tags/rocm-${ROCM_VERSION
&& rm -rf ./hipsolver
# Install hipRAND
# Manually replace usage of __HIP_PLATFORM_NVCC__ with __HIP_PLATFORM_NVIDIA__. See
# https://github.com/ROCm/hipRAND/commit/4925f0da96fad5b9f532ddc79f1f52fc279d329f
RUN wget https://github.com/ROCm/hipRAND/archive/refs/tags/rocm-${ROCM_VERSION}.tar.gz -O hiprand.tar.gz \
&& mkdir hiprand \
&& tar -xf ./hiprand.tar.gz --strip-components 1 -C hiprand \
&& rm ./hiprand.tar.gz \
&& sed -i s/__HIP_PLATFORM_NVCC__/__HIP_PLATFORM_NVIDIA__/ ./hiprand/library/include/hiprand/hiprand.h \
&& cmake -S ./hiprand -B ./hiprand/build \
-D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip \
-D CMAKE_INSTALL_PREFIX=/opt/rocm \

2
Dockerfiles/hip-libraries-rocm-ubuntu.Dockerfile

@ -6,7 +6,7 @@ FROM ubuntu:22.04 @@ -6,7 +6,7 @@ FROM ubuntu:22.04
# The ROCm versions that this image is based of.
# Always write this down as major.minor.patch
ENV ROCM_VERSION=6.1.0
ENV ROCM_VERSION=6.2.0
ENV ROCM_VERSION_APT=${ROCM_VERSION%.0}
# Base packages that are required for the installation

4
Libraries/hipFFT/plan_z2z/main.cpp

@ -56,9 +56,7 @@ void fft_example(const int dimension, const int size = 4, const int direction = @@ -56,9 +56,7 @@ void fft_example(const int dimension, const int size = 4, const int direction =
std::uniform_real_distribution<double> distribution{};
std::generate(input.begin(),
input.end(),
[&]() {
return input_t{distribution(generator), distribution(generator)};
});
[&]() { return input_t{distribution(generator), distribution(generator)}; });
std::cout << "Input:\n" << std::setprecision(3);
print_nd_data(input, n, 16);

17
Libraries/rocFFT/multi_gpu/main.cpp

@ -91,13 +91,16 @@ int main(int argc, char* argv[]) @@ -91,13 +91,16 @@ int main(int argc, char* argv[])
// Define infield geometry
// First entry of upper dimension is the batch size
const size_t batch_size = 1;
const std::vector<size_t> inbrick0_lower = {0, 0, 0, 0};
const std::vector<size_t> inbrick0_upper = {1, length[0] / deviceCount, length[1], length[2]};
const std::vector<size_t> inbrick1_lower = {0, length[0] / deviceCount, 0, 0};
const std::vector<size_t> inbrick1_upper = {1, length[0], length[1], length[2]};
const std::vector<size_t> inbrick0_upper
= {length[0] / deviceCount, length[1], length[2], batch_size};
const std::vector<size_t> inbrick1_lower = {length[0] / deviceCount, 0, 0, 0};
const std::vector<size_t> inbrick1_upper = {length[0], length[1], length[2], batch_size};
// Row-major stride for brick data layout in memory
std::vector<size_t> brick_stride = {fftSize, length[0] * length[1], length[0], 1};
const size_t idist = fftSize; // distance between batches
std::vector<size_t> brick_stride = {1, length[0] * length[1], length[0], idist};
rocfft_field infield = nullptr;
ROCFFT_CHECK(rocfft_field_create(&infield));
@ -145,9 +148,9 @@ int main(int argc, char* argv[]) @@ -145,9 +148,9 @@ int main(int argc, char* argv[])
std::vector<void*> gpu_out(2);
const std::vector<size_t> outbrick0_lower = {0, 0, 0, 0};
const std::vector<size_t> outbrick0_upper = {1, length[0] / deviceCount, length[1], length[2]};
const std::vector<size_t> outbrick1_lower = {0, length[0] / deviceCount, 0, 0};
const std::vector<size_t> outbrick1_upper = {1, length[0], length[1], length[2]};
const std::vector<size_t> outbrick0_upper = {length[0] / deviceCount, length[1], length[2], 1};
const std::vector<size_t> outbrick1_lower = {length[0] / deviceCount, 0, 0, 0};
const std::vector<size_t> outbrick1_upper = {length[0], length[1], length[2], 1};
rocfft_brick outbrick0 = nullptr;
ROCFFT_CHECK(rocfft_brick_create(&outbrick0,

5
Libraries/rocRAND/simple_distributions_cpp/main.cpp

@ -1,6 +1,6 @@ @@ -1,6 +1,6 @@
// MIT License
//
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
@ -253,8 +253,7 @@ int main(const int argc, const char** argv) @@ -253,8 +253,7 @@ int main(const int argc, const char** argv)
if(number_of_devies <= 0)
{
std::cerr << "HIP supported devices not found!"
<< "\n";
std::cerr << "HIP supported devices not found!\n";
exit(error_exit_code);
}

Loading…
Cancel
Save