You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
221 lines
7.3 KiB
221 lines
7.3 KiB
# MIT License |
|
# |
|
# Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. |
|
# |
|
# Permission is hereby granted, free of charge, to any person obtaining a copy |
|
# of this software and associated documentation files (the "Software"), to deal |
|
# in the Software without restriction, including without limitation the rights |
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
# copies of the Software, and to permit persons to whom the Software is |
|
# furnished to do so, subject to the following conditions: |
|
# |
|
# The above copyright notice and this permission notice shall be included in all |
|
# copies or substantial portions of the Software. |
|
# |
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|
# SOFTWARE. |
|
|
|
set(example_name hip_llvm_ir_to_executable) |
|
|
|
cmake_minimum_required(VERSION 3.21 FATAL_ERROR) |
|
project(${example_name} LANGUAGES CXX) |
|
|
|
set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA") |
|
|
|
# Only supported on HIP (not CUDA) |
|
if(NOT "${GPU_RUNTIME}" STREQUAL "HIP") |
|
set(ERROR_MESSAGE |
|
"GPU_RUNTIME is set to \"${GPU_RUNTIME}\".\nGPU_RUNTIME must be HIP." |
|
) |
|
message(FATAL_ERROR ${ERROR_MESSAGE}) |
|
endif() |
|
|
|
enable_language(${GPU_RUNTIME}) |
|
set(CMAKE_${GPU_RUNTIME}_STANDARD 17) |
|
set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF) |
|
set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON) |
|
|
|
if(WIN32) |
|
set(ROCM_ROOT |
|
"$ENV{HIP_PATH}" |
|
CACHE PATH |
|
"Root directory of the ROCm installation" |
|
) |
|
else() |
|
set(ROCM_ROOT |
|
"/opt/rocm" |
|
CACHE PATH |
|
"Root directory of the ROCm installation" |
|
) |
|
endif() |
|
|
|
list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}") |
|
|
|
if(NOT DEFINED CMAKE_HIP_ARCHITECTURES) |
|
set(GPU_ARCHITECTURES "all" CACHE STRING "GPU architectures to compile for") |
|
else() |
|
set(GPU_ARCHITECTURES |
|
"${CMAKE_HIP_ARCHITECTURES}" |
|
CACHE STRING |
|
"GPU architectures to compile for" |
|
) |
|
endif() |
|
|
|
if(GPU_ARCHITECTURES STREQUAL "all") |
|
set(GPU_ARCHITECTURES |
|
"gfx803;gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx1102" |
|
CACHE STRING |
|
"GPU architectures to compile for" |
|
FORCE |
|
) |
|
endif() |
|
|
|
# Remove duplicates |
|
list(REMOVE_DUPLICATES GPU_ARCHITECTURES) |
|
message(STATUS "GPU_ARCHITECTURES: ${GPU_ARCHITECTURES}") |
|
|
|
set_source_files_properties( |
|
main.hip |
|
PROPERTIES COMPILE_OPTIONS "--cuda-host-only" |
|
) |
|
|
|
find_program( |
|
LLVM_DIS_COMMAND |
|
llvm-dis |
|
PATH_SUFFIXES bin |
|
PATHS ${ROCM_ROOT}/llvm ${CMAKE_INSTALL_PREFIX}/llvm |
|
REQUIRED |
|
) |
|
|
|
# Generate the device LLVM IR using the HIP compiler. |
|
foreach(HIP_ARCHITECTURE ${GPU_ARCHITECTURES}) |
|
add_custom_command( |
|
OUTPUT main_${HIP_ARCHITECTURE}.ll main_${HIP_ARCHITECTURE}.bc |
|
COMMAND |
|
${CMAKE_HIP_COMPILER} --cuda-device-only -c -emit-llvm |
|
${CMAKE_CURRENT_SOURCE_DIR}/main.hip |
|
--offload-arch=${HIP_ARCHITECTURE} -o main_${HIP_ARCHITECTURE}.bc -I |
|
${CMAKE_CURRENT_SOURCE_DIR}/../../Common -std=c++17 |
|
COMMAND |
|
${LLVM_DIS_COMMAND} main_${HIP_ARCHITECTURE}.bc -o |
|
main_${HIP_ARCHITECTURE}.ll |
|
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/main.hip |
|
VERBATIM |
|
COMMENT "Generating main_${HIP_ARCHITECTURE}.ll" |
|
) |
|
endforeach() |
|
|
|
if(WIN32) |
|
set(OBJ_TYPE obj) |
|
set(NULDEV NUL) |
|
set(HOST_TARGET x86_64-pc-windows-msvc) |
|
set(HIP_OBJ_GEN_FILE hip_obj_gen_win.mcin) |
|
else() |
|
set(OBJ_TYPE o) |
|
set(NULDEV /dev/null) |
|
set(HOST_TARGET x86_64-unknown-linux) |
|
set(HIP_OBJ_GEN_FILE hip_obj_gen.mcin) |
|
endif() |
|
|
|
# Assemble the device assemblies to object files using the HIP compiler. |
|
# The compiler needs -target amdgcn-amd-amdhsa -mcpu=gfx* in order to assemble the object file |
|
# for the right GPU. |
|
foreach(HIP_ARCHITECTURE ${GPU_ARCHITECTURES}) |
|
add_custom_command( |
|
OUTPUT main_${HIP_ARCHITECTURE}.${OBJ_TYPE} |
|
COMMAND |
|
${CMAKE_HIP_COMPILER} -fPIC -target amdgcn-amd-amdhsa |
|
-mcpu=${HIP_ARCHITECTURE} main_${HIP_ARCHITECTURE}.ll -o |
|
${CMAKE_CURRENT_BINARY_DIR}/main_${HIP_ARCHITECTURE}.${OBJ_TYPE} |
|
DEPENDS main_${HIP_ARCHITECTURE}.ll |
|
VERBATIM |
|
COMMENT "Generating main_${HIP_ARCHITECTURE}.${OBJ_TYPE}" |
|
) |
|
endforeach() |
|
|
|
# Create an offload-bundle from the assembled object files. This needs the clang-offload-bundler tool. |
|
find_program( |
|
OFFLOAD_BUNDLER_COMMAND |
|
clang-offload-bundler |
|
PATH_SUFFIXES bin |
|
PATHS ${ROCM_ROOT}/llvm ${CMAKE_INSTALL_PREFIX}/llvm |
|
REQUIRED |
|
) |
|
|
|
# Generate object bundle. |
|
# The invocation to generate is |
|
# clang-offload-bundler -targets=<targets> -input=<input target #1> -inputs=<input target #2> ... -output=<output> |
|
# Note that the host target must be the first target present here, and it should have an empty input associated to it. |
|
|
|
# Generate BUNDLE_TARGETS as a string of: -targets=host-${HOST_TARGET},hip-amdgcn-amd-amdhsa-${HIP_ARCHITECTURE},... |
|
set(BUNDLE_TARGETS "-targets=host-${HOST_TARGET}") |
|
# Generate BUNDLE_INPUTS as a string of: -input=${CMAKE_CURRENT_BINARY_DIR}/main_${HIP_ARCHITECTURE}.${OBJ_TYPE} ... |
|
set(BUNDLE_INPUTS "-input=${NULDEV}") |
|
# Generate BUNDLE_OBJECTS as a string of: ${CMAKE_CURRENT_BINARY_DIR}/main_${HIP_ARCHITECTURE}.${OBJ_TYPE} |
|
set(BUNDLE_OBJECTS "") |
|
foreach(HIP_ARCHITECTURE ${GPU_ARCHITECTURES}) |
|
set(BUNDLE_TARGETS |
|
"${BUNDLE_TARGETS},hipv4-amdgcn-amd-amdhsa--${HIP_ARCHITECTURE}" |
|
) |
|
list( |
|
APPEND |
|
BUNDLE_INPUTS |
|
"-input=${CMAKE_CURRENT_BINARY_DIR}/main_${HIP_ARCHITECTURE}.${OBJ_TYPE}" |
|
) |
|
list( |
|
APPEND |
|
BUNDLE_OBJECTS |
|
"${CMAKE_CURRENT_BINARY_DIR}/main_${HIP_ARCHITECTURE}.${OBJ_TYPE}" |
|
) |
|
endforeach() |
|
|
|
# Invoke clang-offload-bundler to generate an offload bundle. |
|
set(BUNDLE "${CMAKE_CURRENT_BINARY_DIR}/offload_bundle.hipfb") |
|
add_custom_command( |
|
OUTPUT "${BUNDLE}" |
|
COMMAND |
|
"${OFFLOAD_BUNDLER_COMMAND}" -type=o -bundle-align=4096 |
|
"${BUNDLE_TARGETS}" ${BUNDLE_INPUTS} "-output=${BUNDLE}" |
|
DEPENDS ${BUNDLE_OBJECTS} |
|
VERBATIM |
|
) |
|
|
|
# Create the device binary by assembling the template that includes |
|
# the offload bundle that was just generated using an .incbin directive. |
|
# This needs an assembler. |
|
find_program( |
|
LLVM_MC_COMMAND |
|
llvm-mc |
|
PATH_SUFFIXES bin |
|
PATHS ${ROCM_ROOT}/llvm ${CMAKE_INSTALL_PREFIX}/llvm |
|
REQUIRED |
|
) |
|
|
|
# Invoke llvm-mc to generate an object file containing the offload bundle. |
|
set(DEVICE_OBJECT "${CMAKE_CURRENT_BINARY_DIR}/main_device.${OBJ_TYPE}") |
|
add_custom_command( |
|
OUTPUT "${DEVICE_OBJECT}" |
|
COMMAND |
|
"${LLVM_MC_COMMAND}" -triple "${HOST_TARGET}" |
|
"${CMAKE_CURRENT_SOURCE_DIR}/${HIP_OBJ_GEN_FILE}" -o "${DEVICE_OBJECT}" |
|
--filetype=obj |
|
DEPENDS "${BUNDLE}" |
|
VERBATIM |
|
) |
|
|
|
# Finally, create the executable. |
|
add_executable(${example_name} main.hip ${DEVICE_OBJECT}) |
|
|
|
# Make example runnable using ctest |
|
add_test(${example_name} ${example_name}) |
|
|
|
set(include_dirs "../../Common") |
|
target_include_directories(${example_name} PRIVATE ${include_dirs}) |
|
set_source_files_properties(main.hip PROPERTIES LANGUAGE ${GPU_RUNTIME}) |
|
|
|
install(TARGETS ${example_name})
|
|
|