You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
174 lines
6.5 KiB
174 lines
6.5 KiB
# MIT License |
|
# |
|
# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
|
# |
|
# Permission is hereby granted, free of charge, to any person obtaining a copy |
|
# of this software and associated documentation files (the "Software"), to deal |
|
# in the Software without restriction, including without limitation the rights |
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
# copies of the Software, and to permit persons to whom the Software is |
|
# furnished to do so, subject to the following conditions: |
|
# |
|
# The above copyright notice and this permission notice shall be included in all |
|
# copies or substantial portions of the Software. |
|
# |
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|
# SOFTWARE. |
|
|
|
set(example_name hip_llvm_ir_to_executable) |
|
|
|
cmake_minimum_required(VERSION 3.21 FATAL_ERROR) |
|
project(${example_name} LANGUAGES CXX) |
|
|
|
set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA") |
|
|
|
# Only supported on HIP (not CUDA) |
|
if(NOT "${GPU_RUNTIME}" STREQUAL "HIP") |
|
set(ERROR_MESSAGE "GPU_RUNTIME is set to \"${GPU_RUNTIME}\".\nGPU_RUNTIME must be HIP.") |
|
message(FATAL_ERROR ${ERROR_MESSAGE}) |
|
endif() |
|
|
|
enable_language(${GPU_RUNTIME}) |
|
set(CMAKE_${GPU_RUNTIME}_STANDARD 17) |
|
set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF) |
|
set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON) |
|
|
|
set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation") |
|
if(NOT CMAKE_PREFIX_PATH) |
|
set(CMAKE_PREFIX_PATH "${ROCM_ROOT}") |
|
endif() |
|
|
|
if (NOT DEFINED CMAKE_HIP_ARCHITECTURES) |
|
set(GPU_ARCHITECTURES "all" CACHE STRING "GPU architectures to compile for") |
|
else() |
|
set(GPU_ARCHITECTURES "${CMAKE_HIP_ARCHITECTURES}" CACHE STRING "GPU architectures to compile for") |
|
endif() |
|
|
|
if(GPU_ARCHITECTURES STREQUAL "all") |
|
set(GPU_ARCHITECTURES "gfx803;gfx900;gfx906;gfx908;gfx90a;gfx1030" CACHE STRING "GPU architectures to compile for" FORCE) |
|
endif() |
|
|
|
# Remove duplicates |
|
list(REMOVE_DUPLICATES GPU_ARCHITECTURES) |
|
message(STATUS "GPU_ARCHITECTURES: ${GPU_ARCHITECTURES}") |
|
|
|
set_source_files_properties(main.hip PROPERTIES COMPILE_OPTIONS "--cuda-host-only") |
|
|
|
if (WIN32) |
|
set(OBJ_TYPE obj) |
|
set(NULDEV NUL) |
|
set(HOST_TARGET x86_64-pc-windows-msvc) |
|
set(HIP_OBJ_GEN_FILE hip_obj_gen_win.mcin) |
|
else() |
|
set(OBJ_TYPE o) |
|
set(NULDEV /dev/null) |
|
set(HOST_TARGET x86_64-unknown-linux) |
|
set(HIP_OBJ_GEN_FILE hip_obj_gen.mcin) |
|
endif() |
|
|
|
# Assemble the device assemblies to object files using the HIP compiler. |
|
# The compiler needs -target amdgcn-amd-amdhsa -mcpu=gfx* in order to assemble the object file |
|
# for the right GPU. |
|
foreach(HIP_ARCHITECTURE ${GPU_ARCHITECTURES}) |
|
message(STATUS "Generating main_${HIP_ARCHITECTURE}.${OBJ_TYPE}") |
|
add_custom_command( |
|
OUTPUT main_${HIP_ARCHITECTURE}.${OBJ_TYPE} |
|
COMMAND ${CMAKE_HIP_COMPILER} -fPIC -target amdgcn-amd-amdhsa -mcpu=${HIP_ARCHITECTURE} |
|
${CMAKE_CURRENT_SOURCE_DIR}/main_${HIP_ARCHITECTURE}.ll |
|
-o ${CMAKE_CURRENT_BINARY_DIR}/main_${HIP_ARCHITECTURE}.${OBJ_TYPE} |
|
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/main_${HIP_ARCHITECTURE}.ll |
|
VERBATIM) |
|
endforeach() |
|
|
|
# Create an offload-bundle from the assembled object files. This needs the clang-offload-bundler tool. |
|
find_program( |
|
OFFLOAD_BUNDLER_COMMAND clang-offload-bundler |
|
PATH_SUFFIXES bin |
|
PATHS |
|
${ROCM_ROOT}/llvm |
|
${CMAKE_INSTALL_PREFIX}/llvm |
|
REQUIRED) |
|
|
|
if(OFFLOAD_BUNDLER_COMMAND) |
|
message(STATUS "clang-offload-bundler found: ${CLANG_OFFLOAD_BUNDLER}") |
|
else() |
|
message(FATAL_ERROR "clang-offload-bundler not found") |
|
endif() |
|
|
|
# Generate object bundle. |
|
# The invocation to generate is |
|
# clang-offload-bundler -targets=<targets> -input=<input target #1> -inputs=<input target #2> ... -output=<output> |
|
# Note that the host target must be the first target present here, and it should have an empty input associated to it. |
|
|
|
# Generate BUNDLE_TARGETS as a string of: -targets=host-${HOST_TARGET},hip-amdgcn-amd-amdhsa-${HIP_ARCHITECTURE},... |
|
set(BUNDLE_TARGETS "-targets=host-${HOST_TARGET}") |
|
# Generate BUNDLE_INPUTS as a string of: -input=${CMAKE_CURRENT_BINARY_DIR}/main_${HIP_ARCHITECTURE}.${OBJ_TYPE} ... |
|
set(BUNDLE_INPUTS "-input=${NULDEV}") |
|
# Generate BUNDLE_OBJECTS as a string of: ${CMAKE_CURRENT_BINARY_DIR}/main_${HIP_ARCHITECTURE}.${OBJ_TYPE} |
|
set(BUNDLE_OBJECTS "") |
|
foreach(HIP_ARCHITECTURE ${GPU_ARCHITECTURES}) |
|
set(BUNDLE_TARGETS "${BUNDLE_TARGETS},hipv4-amdgcn-amd-amdhsa--${HIP_ARCHITECTURE}") |
|
list(APPEND BUNDLE_INPUTS "-input=${CMAKE_CURRENT_BINARY_DIR}/main_${HIP_ARCHITECTURE}.${OBJ_TYPE}") |
|
list(APPEND BUNDLE_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/main_${HIP_ARCHITECTURE}.${OBJ_TYPE}") |
|
endforeach() |
|
|
|
# Invoke clang-offload-bundler to generate an offload bundle. |
|
set(BUNDLE "${CMAKE_CURRENT_BINARY_DIR}/offload_bundle.hipfb") |
|
add_custom_command( |
|
OUTPUT "${BUNDLE}" |
|
COMMAND |
|
"${OFFLOAD_BUNDLER_COMMAND}" |
|
-type=o |
|
-bundle-align=4096 |
|
"${BUNDLE_TARGETS}" |
|
${BUNDLE_INPUTS} |
|
"-output=${BUNDLE}" |
|
DEPENDS ${BUNDLE_OBJECTS} |
|
VERBATIM) |
|
|
|
# Create the device binary by assembling the template that includes |
|
# the offload bundle that was just generated using an .incbin directive. |
|
# This needs an assembler. |
|
find_program( |
|
LLVM_MC_COMMAND llvm-mc |
|
PATH_SUFFIXES bin |
|
PATHS |
|
${ROCM_ROOT}/llvm |
|
${CMAKE_INSTALL_PREFIX}/llvm) |
|
|
|
if(LLVM_MC_COMMAND) |
|
message(STATUS "llvm-mc found: ${LLVM_MC_COMMAND}") |
|
else() |
|
message(FATAL_ERROR "llvm-mc not found") |
|
endif() |
|
|
|
# Invoke llvm-mc to generate an object file containing the offload bundle. |
|
set(DEVICE_OBJECT "${CMAKE_CURRENT_BINARY_DIR}/main_device.${OBJ_TYPE}") |
|
add_custom_command( |
|
OUTPUT "${DEVICE_OBJECT}" |
|
COMMAND |
|
"${LLVM_MC_COMMAND}" |
|
-triple "${HOST_TARGET}" |
|
"${CMAKE_CURRENT_SOURCE_DIR}/${HIP_OBJ_GEN_FILE}" |
|
-o "${DEVICE_OBJECT}" |
|
--filetype=obj |
|
DEPENDS "${BUNDLE}" |
|
VERBATIM) |
|
|
|
# Finally, create the executable. |
|
add_executable( |
|
${example_name} |
|
main.hip |
|
${DEVICE_OBJECT}) |
|
|
|
# Make example runnable using ctest |
|
add_test(${example_name} ${example_name}) |
|
|
|
set(include_dirs "../../Common") |
|
target_include_directories(${example_name} PRIVATE ${include_dirs}) |
|
set_source_files_properties(main.hip PROPERTIES LANGUAGE ${GPU_RUNTIME})
|
|
|