Browse Source
* add cmake build instructions for windows * Add bit extract example * Resolve "Multi GPU data transfer example" * Resolve "Vulkan interop example" * fix bit extract typo * Resolve "OpenGL interop example" * Add module API example * enable -Wall -Wextra -Werror in cmake in ci * Resolve "CMake don't throw error when building examples and certain libraries are missing." * Device Globals example * add moving average example * Resolve "Static library example" * fix missing opengl cmake check * Resolve "Inline assembly / GPU arch example" * revert to old msvc project file structure * add hip basic texture management example * normalize line endings * remove hiprtc from bitextract * Resolve "Cooperative groups example" * Fix GUIDs * Resolve "Floyd-Warshall example" Co-authored-by: Nol Moonen <nol@streamhpc.com> Co-authored-by: Beatriz Navidad Vilches <beatriz@streamhpc.com> Co-authored-by: Robin Voetter <robin@streamhpc.com> Co-authored-by: Vince van Heertum <vince@streamhpc.com>pull/16/head
242 changed files with 19021 additions and 1983 deletions
@ -1 +1,4 @@
@@ -1 +1,4 @@
|
||||
*.hip gitlab-language=cuda linguist-language=Cuda |
||||
*.sln text eol=crlf |
||||
*.vcxproj text eol=crlf |
||||
*.vcxproj.filters text eol=crlf |
||||
|
@ -0,0 +1,26 @@
@@ -0,0 +1,26 @@
|
||||
# MIT License |
||||
# |
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
# |
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
# of this software and associated documentation files (the "Software"), to deal |
||||
# in the Software without restriction, including without limitation the rights |
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
# copies of the Software, and to permit persons to whom the Software is |
||||
# furnished to do so, subject to the following conditions: |
||||
# |
||||
# The above copyright notice and this permission notice shall be included in all |
||||
# copies or substantial portions of the Software. |
||||
# |
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||
# SOFTWARE. |
||||
|
||||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR) |
||||
project(Applications LANGUAGES CXX) |
||||
|
||||
add_subdirectory(floyd_warshall) |
@ -0,0 +1,34 @@
@@ -0,0 +1,34 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
EXAMPLES := \
|
||||
floyd_warshall |
||||
|
||||
all: $(EXAMPLES) |
||||
|
||||
clean: TARGET=clean |
||||
clean: all |
||||
|
||||
$(EXAMPLES): |
||||
$(MAKE) -C $@ $(TARGET) |
||||
|
||||
.PHONY: all clean $(EXAMPLES) |
@ -0,0 +1,43 @@
@@ -0,0 +1,43 @@
|
||||
# Applications Examples |
||||
|
||||
## Summary |
||||
The examples in this subdirectory showcase several GPU-implementations of finance, computer science, physics, etc. models or algorithms that additionally offer a command line application. The examples are build on Linux for the ROCm (AMD GPU) backend. Some examples additionally support the CUDA (NVIDIA GPU) backend. |
||||
|
||||
## Prerequisites |
||||
### Linux |
||||
- [CMake](https://cmake.org/download/) (at least version 3.21) |
||||
- OR GNU Make - available via the distribution's package manager |
||||
- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x) |
||||
|
||||
### Windows |
||||
- [Visual Studio](https://visualstudio.microsoft.com/) 2019 or 2022 with the "Desktop Development with C++" workload |
||||
- ROCm toolchain for Windows (No public release yet) |
||||
- The Visual Studio ROCm extension needs to be installed to build with the solution files. |
||||
- [CMake](https://cmake.org/download/) (optional, to build with CMake. Requires at least version 3.21) |
||||
- [Ninja](https://ninja-build.org/) (optional, to build with CMake) |
||||
|
||||
## Building |
||||
### Linux |
||||
Make sure that the dependencies are installed, or use one of the [provided Dockerfiles](../../Dockerfiles/) to build and run the examples in a containerized environment. |
||||
|
||||
#### Using CMake |
||||
All examples in the `Applications` subdirectory can either be built by a single CMake project or be built independently. |
||||
|
||||
- `$ cd Libraries/Applications` |
||||
- `$ cmake -S . -B build` (on ROCm) or `$ cmake -S . -B build -D GPU_RUNTIME=CUDA` (on CUDA, when supported) |
||||
- `$ cmake --build build` |
||||
|
||||
#### Using Make |
||||
All examples can be built by a single invocation to Make or be built independently. |
||||
|
||||
- `$ cd Libraries/Applications` |
||||
- `$ make` (on ROCm) or `$ make GPU_RUNTIME=CUDA` (on CUDA, when supported) |
||||
|
||||
### Windows |
||||
#### Visual Studio |
||||
Visual Studio solution files are available for the individual examples. To build all supported HIP runtime examples open the top level solution file [ROCm-Examples-VS2019.sln](../../ROCm-Examples-VS2019.sln) and filter for Applications. |
||||
|
||||
For more detailed build instructions refer to the top level [README.md](../../README.md#visual-studio). |
||||
|
||||
#### CMake |
||||
All examples in the `Applications` subdirectory can either be built by a single CMake project or be built independently. For build instructions refer to the top-level [README.md](../../README.md#cmake-2). |
@ -0,0 +1 @@
@@ -0,0 +1 @@
|
||||
applications_floyd_warshall |
@ -0,0 +1,58 @@
@@ -0,0 +1,58 @@
|
||||
# MIT License |
||||
# |
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
# |
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
# of this software and associated documentation files (the "Software"), to deal |
||||
# in the Software without restriction, including without limitation the rights |
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
# copies of the Software, and to permit persons to whom the Software is |
||||
# furnished to do so, subject to the following conditions: |
||||
# |
||||
# The above copyright notice and this permission notice shall be included in all |
||||
# copies or substantial portions of the Software. |
||||
# |
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||
# SOFTWARE. |
||||
|
||||
set(example_name applications_floyd_warshall) |
||||
|
||||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR) |
||||
project(${example_name} LANGUAGES CXX) |
||||
|
||||
set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA") |
||||
set(GPU_RUNTIMES "HIP" "CUDA") |
||||
set_property(CACHE GPU_RUNTIME PROPERTY STRINGS ${GPU_RUNTIMES}) |
||||
|
||||
if(NOT "${GPU_RUNTIME}" IN_LIST GPU_RUNTIMES) |
||||
set(ERROR_MESSAGE "GPU_RUNTIME is set to \"${GPU_RUNTIME}\".\nGPU_RUNTIME must be either HIP or CUDA.") |
||||
message(FATAL_ERROR ${ERROR_MESSAGE}) |
||||
endif() |
||||
|
||||
enable_language(${GPU_RUNTIME}) |
||||
set(CMAKE_${GPU_RUNTIME}_STANDARD 17) |
||||
set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF) |
||||
set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON) |
||||
|
||||
set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation") |
||||
if(NOT CMAKE_PREFIX_PATH) |
||||
set(CMAKE_PREFIX_PATH "${ROCM_ROOT}") |
||||
endif() |
||||
|
||||
add_executable(${example_name} main.hip) |
||||
# Make example runnable using ctest |
||||
add_test(${example_name} ${example_name}) |
||||
|
||||
set(include_dirs "../../Common") |
||||
# For examples targeting NVIDIA, include the HIP header directory. |
||||
if(GPU_RUNTIME STREQUAL "CUDA") |
||||
list(APPEND include_dirs "${ROCM_ROOT}/include") |
||||
endif() |
||||
|
||||
target_include_directories(${example_name} PRIVATE ${include_dirs}) |
||||
set_source_files_properties(main.hip PROPERTIES LANGUAGE ${GPU_RUNTIME}) |
@ -0,0 +1,60 @@
@@ -0,0 +1,60 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
EXAMPLE := applications_floyd_warshall |
||||
COMMON_INCLUDE_DIR := ../../Common |
||||
GPU_RUNTIME := HIP |
||||
|
||||
# HIP variables
|
||||
ROCM_INSTALL_DIR := /opt/rocm |
||||
HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include |
||||
|
||||
HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc |
||||
|
||||
# Common variables and flags
|
||||
CXX_STD := c++17 |
||||
ICXXFLAGS := -std=$(CXX_STD) |
||||
ICPPFLAGS := -I $(COMMON_INCLUDE_DIR) |
||||
ILDFLAGS := |
||||
ILDLIBS := |
||||
|
||||
ifeq ($(GPU_RUNTIME), CUDA) |
||||
ICXXFLAGS += -x cu |
||||
ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR) |
||||
else ifeq ($(GPU_RUNTIME), HIP) |
||||
CXXFLAGS ?= -Wall -Wextra |
||||
else |
||||
$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP) |
||||
endif |
||||
|
||||
ICXXFLAGS += $(CXXFLAGS) |
||||
ICPPFLAGS += $(CPPFLAGS) |
||||
ILDFLAGS += $(LDFLAGS) |
||||
ILDLIBS += $(LDLIBS) |
||||
|
||||
$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp $(COMMON_INCLUDE_DIR)/cmdparser.hpp |
||||
$(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS) |
||||
|
||||
clean: |
||||
$(RM) $(EXAMPLE) |
||||
|
||||
.PHONY: clean |
@ -0,0 +1,60 @@
@@ -0,0 +1,60 @@
|
||||
# Applications Floyd-Warshall Example |
||||
|
||||
## Description |
||||
This example showcases a GPU implementation of the [Floyd-Warshall algorithm](https://en.wikipedia.org/wiki/Floyd%E2%80%93Warshall_algorithm), which computes the shortest path between each pair of nodes in a given directed and (in this case) complete graph $G = (V, E, \omega)$. The key point of this implementation is that each kernel launch represents a step $k$ of the traditional CPU-implemented algorithm. Therefore, the kernel is launched as much times as nodes $\left(n = \vert V \vert \right)$ has the graph. |
||||
|
||||
In this example, there are `iterations` (consecutive) executions of the algorithm on the same graph. As each execution requires an unmodified graph input, multiple copy operations are required. Hence, the performance of the example can be improved by using _pinned memory_. |
||||
|
||||
Pinned memory is simply a special kind of memory that cannot be paged out the physical memory of a process, meaning that the virtual addresses associated with it are always mapped to physical memory. When copying data from/to the host to/from the GPU, the host source/destination must be pinned memory and, in case it is not, an extra allocation of pinned memory is first performed (copying the data residing in or being copied to the non-pinned host memory) and then the actual copy of the data takes place. |
||||
|
||||
Therefore, using pinned memory saves around 2x the time needed to copy from/to host memory. In this example, performances is improved by using this type of memory, given that there are `iterations` (consecutive) executions of the algorithm on the same graph. |
||||
|
||||
### Application flow |
||||
1. Default values for the number of nodes of the graph and the number of iterations for the algorithm execution are set. |
||||
2. Command line arguments are parsed (if any) and the previous values are updated. |
||||
3. A number of constants are defined for kernel execution and input/output data size. |
||||
4. Host memory is allocated for the distance matrix and initialized with the increasing sequence $1,2,3,\dots$ . These values represent the weights of the edges of the graph. |
||||
5. Host memory is allocated for the adjacency matrix and initialized such that the initial path between each pair of vertices $x,y \in V$ ($x \neq y$) is the edge $(x,y)$. |
||||
6. Pinned memory is allocated and mapped to device memory. The latter is initialized with the input matrices (distance and adjacency) representing the graph $G$ and the Floyd-Warshall kernel is executed for each node of the graph. |
||||
7. The resulting distance and adjacency matrices are copied to the host and pinned memory is freed. |
||||
8. The mean time in milliseconds needed for each iteration is printed to standard output. |
||||
9. The results obtained are compared with the CPU implementation of the algorithm. The result of the comparison is printed to the standard output. |
||||
|
||||
|
||||
### Command line interface |
||||
There are three parameters available: |
||||
- `-h` displays information about the available parameters and their default values. |
||||
- `-n nodes` sets `nodes` as the number of nodes of the graph to which the Floyd-Warshall algorithm will be applied. It must be a (positive) multiple of `block_size` (= 16). Its default value is 16. |
||||
- `-i iterations` sets `iterations` as the number of times that the algorithm will be applied to the (same) graph. It must be an integer greater than 0. Its default value is 1. |
||||
|
||||
## Key APIs and Concepts |
||||
- For this GPU implementation of the Floyd-Warshall algorithm, the main kernel (`floyd_warshall_kernel`) that is launched in a 2-dimensional grid. Each thread in the grid computes the shortest path between two nodes of the graph at a certain step $k$ $\left(0 \leq k < n \right)$. The threads compare the previously computed shortest paths using only the nodes in $V'=\{v_0,v_1,...,v_{k-1}\} \subseteq V$ as intermediate nodes with the paths that include node $v_k$ as an intermediate node, and take the shortest option. Therefore, the kernel is launched $n$ times. |
||||
- For improved performance, pinned memory is used to pass the results obtained in each iteration to the next one. With `hipHostMalloc` pinned host memory (accessible by the device) can be allocated, and `hipHostFree` frees it. In this example, host pinned memory is allocated using the `hipHostMallocMapped` flag, which indicates that `hipHostMalloc` must map the allocation into the address space of the current device. The device pointer to such allocated pinned memory is obtained with `hipHostGetDevicePointer`. Beware that an excessive allocation of pinned memory can slow down the host execution, as the program is left with less physical memory available to map the rest of the virtual addresses used. |
||||
- With `hipMemcpy` data bytes can be transferred from host to device (using `hipMemcpyHostToDevice`) or from device to host (using `hipMemcpyDeviceToHost`), among others. |
||||
- `hipLaunchKernelGGL` queues the kernel execution on the device. All the kernels are launched on the `hipStreamDefault`, meaning that these executions are performed in order. `hipGetLastError` returns the last error produced by any runtime API call, allowing to check if any kernel launch resulted in error. |
||||
- `hipEventCreate` creates the events used to measure kernel execution time, `hipEventRecord` starts recording an event and `hipEventSynchronize` waits for all the previous work in the stream when the specified event was recorded. With these three functions it can be measured the start and stop times of the kernel, and with `hipEventElapsedTime` the kernel execution time (in milliseconds) can be obtained. |
||||
|
||||
## Demonstrated API Calls |
||||
|
||||
### HIP runtime |
||||
#### Device symbols |
||||
- `blockIdx` |
||||
- `blockDim` |
||||
- `threadIdx` |
||||
|
||||
#### Host symbols |
||||
- `__global__` |
||||
- `hipEventCreate` |
||||
- `hipEventElapsedTime` |
||||
- `hipEventRecord` |
||||
- `hipEventSynchronize` |
||||
- `hipGetLastError` |
||||
- `hipHostFree` |
||||
- `hipHostGetDevicePointer` |
||||
- `hipHostMalloc` |
||||
- `hipHostMallocMapped` |
||||
- `hipLaunchKernelGGL` |
||||
- `hipMemcpy` |
||||
- `hipMemcpyDeviceToHost` |
||||
- `hipMemcpyHostToDevice` |
||||
- `hipStreamDefault` |
@ -0,0 +1,104 @@
@@ -0,0 +1,104 @@
|
||||
<?xml version="1.0" encoding="utf-8"?> |
||||
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> |
||||
<ItemGroup Label="ProjectConfigurations"> |
||||
<ProjectConfiguration Include="Debug|x64"> |
||||
<Configuration>Debug</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
<ProjectConfiguration Include="Release|x64"> |
||||
<Configuration>Release</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClCompile Include="main.hip" /> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClInclude Include="..\..\Common\example_utils.hpp" /> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClInclude Include="..\..\Common\cmdparser.hpp" /> |
||||
</ItemGroup> |
||||
<PropertyGroup Label="Globals"> |
||||
<VCProjectVersion>15.0</VCProjectVersion> |
||||
<ProjectGuid>{FB6B7014-2BC9-475C-B3CC-FEE6B4C5B103}</ProjectGuid> |
||||
<Keyword>Win32Proj</Keyword> |
||||
<RootNamespace>floyd_warshall_vs2019</RootNamespace> |
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>true</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>false</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<WholeProgramOptimization>true</WholeProgramOptimization> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> |
||||
<ImportGroup Label="ExtensionSettings"> |
||||
<Import Condition="'$(HIPPropertiesImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.props" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="Shared"> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<PropertyGroup Label="UserMacros" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<LinkIncremental>true</LinkIncremental> |
||||
<TargetName>applications_$(ProjectName)</TargetName> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<LinkIncremental>false</LinkIncremental> |
||||
<TargetName>applications_$(ProjectName)</TargetName> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level2</WarningLevel> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
<RuntimeTypeInfo>true</RuntimeTypeInfo> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
</Link> |
||||
</ItemDefinitionGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level2</WarningLevel> |
||||
<FunctionLevelLinking>true</FunctionLevelLinking> |
||||
<IntrinsicFunctions>true</IntrinsicFunctions> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
<RuntimeTypeInfo>true</RuntimeTypeInfo> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding> |
||||
<OptimizeReferences>true</OptimizeReferences> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
</Link> |
||||
</ItemDefinitionGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> |
||||
<ImportGroup Label="ExtensionTargets"> |
||||
<Import Condition="'$(HIPTargetsImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.targets" /> |
||||
</ImportGroup> |
||||
</Project> |
@ -0,0 +1,281 @@
@@ -0,0 +1,281 @@
|
||||
// MIT License |
||||
// |
||||
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
// |
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
// of this software and associated documentation files (the "Software"), to deal |
||||
// in the Software without restriction, includ_adjacency_matrixg without limitation the rights |
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
// copies of the Software, and to permit persons to whom the Software is |
||||
// furnished to do so, subject to the following conditions: |
||||
// |
||||
// The above copyright notice and this permission notice shall be included in all |
||||
// copies or substantial portions of the Software. |
||||
// |
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
// IMPLIED, INCLUd_adjacency_matrixG BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||
// SOFTWARE. |
||||
|
||||
#include "cmdparser.hpp" |
||||
#include "example_utils.hpp" |
||||
|
||||
#include <hip/hip_runtime.h> |
||||
|
||||
#include <cassert> |
||||
#include <iostream> |
||||
#include <numeric> |
||||
#include <vector> |
||||
|
||||
/// \brief Implements the k-th (0 <= k < nodes) step of Floyd-Warshall algorithm. That is, |
||||
/// given a directed and weighted graph G = (V,E,w) (also complete in this example), it |
||||
/// computes the shortest path between every pair of vertices only considering as intermediate |
||||
/// nodes in the path the ones in the subset V' = {v_0,v_1,...,v_k} of V. |
||||
__global__ void floyd_warshall_kernel(unsigned int* part_adjacency_matrix, |
||||
unsigned int* part_next_matrix, |
||||
const unsigned int nodes, |
||||
const unsigned int k) |
||||
{ |
||||
// Compute the vertices which shortest path each thread is going to process. |
||||
int x = blockIdx.x * blockDim.x + threadIdx.x; |
||||
int y = blockIdx.y * blockDim.y + threadIdx.y; |
||||
|
||||
// Get the current distance between the two vertices (only with intermediate nodes in |
||||
// {v_0,v_1,...,v_{k-1}}) and compute the distance using node v_k as intermediate. Note that |
||||
// d_x_k_y is the shortest path between x and y with node v_k as intermediate, because |
||||
// otherwise we could find a shorter path between y and v_k or/and v_k and x using intermediate |
||||
// nodes from {v_0,v_1,...,v_{k-1}} and thus contradicting the fact that the current paths |
||||
// between those two pairs of nodes are already the shortest possible. |
||||
int d_x_y = part_adjacency_matrix[y * nodes + x]; |
||||
int d_x_k_y = part_adjacency_matrix[y * nodes + k] + part_adjacency_matrix[k * nodes + x]; |
||||
|
||||
// If the path with intermediate nodes in {v_0, ..., v_{k-1}} is longer than the one |
||||
// with intermediate node v_k, update matrices so the latter is selected as the |
||||
// shortest path between x and y with intermediate nodes in {v_0, ..., v_k}. |
||||
if(d_x_k_y < d_x_y) |
||||
{ |
||||
part_adjacency_matrix[y * nodes + x] = d_x_k_y; |
||||
part_next_matrix[y * nodes + x] = k; |
||||
} |
||||
} |
||||
|
||||
/// \brief Reference CPU implementation of Floyd-Warshall algorithm for results verification. |
||||
void floyd_warshall_reference(unsigned int* adjacency_matrix, |
||||
unsigned int* next_matrix, |
||||
const unsigned int nodes) |
||||
{ |
||||
for(unsigned int k = 0; k < nodes; k++) |
||||
{ |
||||
for(unsigned int x = 0; x < nodes; x++) |
||||
{ |
||||
const unsigned int row_x = x * nodes; |
||||
for(unsigned int y = 0; y < nodes; y++) |
||||
{ |
||||
// d_x_y is the shortest distance from node x to node y with intermediate |
||||
// nodes in {v_0, ..., v_{k-1}}. The other two are analogous. |
||||
const unsigned int d_x_y = adjacency_matrix[row_x + y]; |
||||
const unsigned int d_x_k = adjacency_matrix[row_x + k]; |
||||
const unsigned int d_k_y = adjacency_matrix[k * nodes + y]; |
||||
|
||||
// Shortest distance from node x to node y passing through node v_k. |
||||
const unsigned int d_x_k_y = d_x_k + d_k_y; |
||||
|
||||
// If the path with intermediate nodes in {v_0, ..., v_{k-1}} is longer than the one |
||||
// with intermediate node v_k, update matrices so the latter is selected as the |
||||
// shortest path between x and y with intermediate nodes in {v_0, ..., v_k}. |
||||
if(d_x_k_y < d_x_y) |
||||
{ |
||||
adjacency_matrix[row_x + y] = d_x_k_y; |
||||
next_matrix[row_x + y] = k; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// \brief Adds to a command line parser the necessary options for this example. |
||||
template<unsigned int BlockSize> |
||||
void configure_parser(cli::Parser& parser) |
||||
{ |
||||
// Default parameters. |
||||
constexpr unsigned int nodes = 16; |
||||
constexpr unsigned int iterations = 1; |
||||
|
||||
static_assert(((nodes % BlockSize == 0)), |
||||
"Number of nodes must be a positive multiple of BlockSize"); |
||||
static_assert(((iterations > 0)), "Number of iterations must be at least 1"); |
||||
|
||||
// Add options to the command line parser. |
||||
parser.set_optional<unsigned int>("n", "nodes", nodes, "Number of nodes in the graph."); |
||||
parser.set_optional<unsigned int>("i", |
||||
"iterations", |
||||
iterations, |
||||
"Number of times the algorithm is executed."); |
||||
} |
||||
|
||||
int main(int argc, char* argv[]) |
||||
{ |
||||
// Number of threads in each kernel block dimension. |
||||
constexpr unsigned int block_size = 16; |
||||
|
||||
// Parse user input. |
||||
cli::Parser parser(argc, argv); |
||||
configure_parser<block_size>(parser); |
||||
parser.run_and_exit_if_error(); |
||||
|
||||
// Get number of nodes and iterations from the command line, if provided. |
||||
const unsigned int nodes = parser.get<unsigned int>("n"); |
||||
const unsigned int iterations = parser.get<unsigned int>("i"); |
||||
|
||||
// Check values provided. |
||||
if(nodes % block_size) |
||||
{ |
||||
std::cout << "Number of nodes must be a positive multiple of block_size (" |
||||
<< std::to_string(block_size) << ")." << std::endl; |
||||
exit(0); |
||||
} |
||||
if(iterations == 0) |
||||
{ |
||||
std::cout << "Number of iterations must be at least 1." << std::endl; |
||||
exit(0); |
||||
} |
||||
|
||||
// Total number of elements and bytes of the input matrices. |
||||
const unsigned int size = nodes * nodes; |
||||
const unsigned int size_bytes = nodes * nodes * sizeof(unsigned int); |
||||
|
||||
// Number of threads in each kernel block and number of blocks in the grid. |
||||
const dim3 block_dim(block_size, block_size); |
||||
const dim3 grid_dim(nodes / block_size, nodes / block_size); |
||||
|
||||
// Allocate host input adjacency matrix initialized with the increasing sequence 1,2,3,... . |
||||
// Overwrite diagonal values (distance from a node to itself) to 0. |
||||
std::vector<unsigned int> adjacency_matrix(size); |
||||
std::iota(adjacency_matrix.begin(), adjacency_matrix.end(), 1); |
||||
for(unsigned int x = 0; x < nodes; x++) |
||||
{ |
||||
adjacency_matrix[x * nodes + x] = 0; |
||||
} |
||||
|
||||
// Allocate host input matrix for the reconstruction of the paths obtained and initialize such |
||||
// that the path from node x to node y is just the edge (x,y) for any pair of nodes x and y. |
||||
std::vector<unsigned int> next_matrix(size); |
||||
for(unsigned int x = 0; x < nodes; x++) |
||||
{ |
||||
for(unsigned int y = 0; y < x; y++) |
||||
{ |
||||
next_matrix[x * nodes + y] = x; |
||||
next_matrix[y * nodes + x] = y; |
||||
} |
||||
next_matrix[x * nodes + x] = x; |
||||
} |
||||
|
||||
// Allocate host memory for the CPU implementation and copy input data. |
||||
std::vector<unsigned int> expected_adjacency_matrix(adjacency_matrix); |
||||
std::vector<unsigned int> expected_next_matrix(next_matrix); |
||||
|
||||
// Declare host input (pinned) memory for incremental results from kernel executions. |
||||
unsigned int* part_adjacency_matrix = nullptr; |
||||
unsigned int* part_next_matrix = nullptr; |
||||
|
||||
// Cumulative variable to compute the mean time per iteration of the algorithm. |
||||
double kernel_time = 0; |
||||
|
||||
std::cout << "Executing Floyd-Warshall algorithm for " << iterations |
||||
<< " iterations with a complete graph of " << nodes << " nodes." << std::endl; |
||||
|
||||
// Allocate pinned host memory mapped to device memory. |
||||
HIP_CHECK(hipHostMalloc(&part_adjacency_matrix, size_bytes, hipHostMallocMapped)); |
||||
HIP_CHECK(hipHostMalloc(&part_next_matrix, size_bytes, hipHostMallocMapped)); |
||||
|
||||
// Get device pointer to pinned host memory allocations for the input matrices. |
||||
float *d_adjacency_matrix, *d_next_matrix; |
||||
HIP_CHECK( |
||||
hipHostGetDevicePointer((void**)&d_adjacency_matrix, part_adjacency_matrix, 0 /*flags*/)); |
||||
HIP_CHECK(hipHostGetDevicePointer((void**)&d_next_matrix, part_next_matrix, 0 /*flags*/)); |
||||
|
||||
// Run iterations times the Floyd-Warshall GPU algorithm. |
||||
for(unsigned int i = 0; i < iterations; ++i) |
||||
{ |
||||
// Copy input data from host to device memory. |
||||
HIP_CHECK(hipMemcpy(d_adjacency_matrix, |
||||
adjacency_matrix.data(), |
||||
size_bytes, |
||||
hipMemcpyHostToDevice)); |
||||
HIP_CHECK(hipMemcpy(d_next_matrix, next_matrix.data(), size_bytes, hipMemcpyHostToDevice)); |
||||
|
||||
// Create events to measure the execution time of the kernels. |
||||
hipEvent_t start, stop; |
||||
HIP_CHECK(hipEventCreate(&start)); |
||||
HIP_CHECK(hipEventCreate(&stop)); |
||||
float kernel_ms{}; |
||||
|
||||
// Floyd-Warshall GPU algorithm: launch Floyd-Warshall kernel for each node of the graph. |
||||
for(unsigned int k = 0; k < nodes; ++k) |
||||
{ |
||||
// Record the start event. |
||||
HIP_CHECK(hipEventRecord(start, hipStreamDefault)); |
||||
|
||||
// Launch Floyd-Warshall kernel on the default stream. |
||||
hipLaunchKernelGGL(floyd_warshall_kernel, |
||||
grid_dim, |
||||
block_dim, |
||||
0, |
||||
hipStreamDefault, |
||||
part_adjacency_matrix, |
||||
part_next_matrix, |
||||
nodes, |
||||
k); |
||||
|
||||
// Check if the kernel launch was successful. |
||||
HIP_CHECK(hipGetLastError()); |
||||
|
||||
// Record the stop event and wait until the kernel execution finishes. |
||||
HIP_CHECK(hipEventRecord(stop, hipStreamDefault)); |
||||
HIP_CHECK(hipEventSynchronize(stop)); |
||||
|
||||
// Get the execution time of the kernel and add it to the total count. |
||||
HIP_CHECK(hipEventElapsedTime(&kernel_ms, start, stop)); |
||||
kernel_time += kernel_ms; |
||||
} |
||||
} |
||||
|
||||
// Copy results back to host. |
||||
HIP_CHECK( |
||||
hipMemcpy(adjacency_matrix.data(), d_adjacency_matrix, size_bytes, hipMemcpyDeviceToHost)); |
||||
HIP_CHECK(hipMemcpy(next_matrix.data(), d_next_matrix, size_bytes, hipMemcpyDeviceToHost)); |
||||
|
||||
// Free device memory. |
||||
HIP_CHECK(hipHostFree(part_adjacency_matrix)); |
||||
HIP_CHECK(hipHostFree(part_next_matrix)); |
||||
|
||||
// Print the mean time per iteration (in miliseconds) of the algorithm. |
||||
kernel_time /= iterations; |
||||
std::cout << "The mean time needed for each iteration has been " << kernel_time << "ms." |
||||
<< std::endl; |
||||
|
||||
// Execute CPU algorithm. |
||||
floyd_warshall_reference(expected_adjacency_matrix.data(), expected_next_matrix.data(), nodes); |
||||
|
||||
// Verify results. |
||||
unsigned int errors = 0; |
||||
std::cout << "Validating results with CPU implementation." << std::endl; |
||||
for(unsigned int i = 0; i < size; ++i) |
||||
{ |
||||
errors += (adjacency_matrix[i] - expected_adjacency_matrix[i] != 0); |
||||
errors += (next_matrix[i] - expected_next_matrix[i] != 0); |
||||
} |
||||
|
||||
if(errors) |
||||
{ |
||||
std::cout << "Validation failed with " << errors << " errors." << std::endl; |
||||
return error_exit_code; |
||||
} |
||||
else |
||||
{ |
||||
std::cout << "Validation passed." << std::endl; |
||||
} |
||||
} |
@ -0,0 +1,311 @@
@@ -0,0 +1,311 @@
|
||||
#ifndef __khrplatform_h_ |
||||
#define __khrplatform_h_ |
||||
|
||||
/*
|
||||
** Copyright (c) 2008-2018 The Khronos Group Inc. |
||||
** |
||||
** Permission is hereby granted, free of charge, to any person obtaining a |
||||
** copy of this software and/or associated documentation files (the |
||||
** "Materials"), to deal in the Materials without restriction, including |
||||
** without limitation the rights to use, copy, modify, merge, publish, |
||||
** distribute, sublicense, and/or sell copies of the Materials, and to |
||||
** permit persons to whom the Materials are furnished to do so, subject to |
||||
** the following conditions: |
||||
** |
||||
** The above copyright notice and this permission notice shall be included |
||||
** in all copies or substantial portions of the Materials. |
||||
** |
||||
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||
** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||
** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||
** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||
** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. |
||||
*/ |
||||
|
||||
/* Khronos platform-specific types and definitions.
|
||||
* |
||||
* The master copy of khrplatform.h is maintained in the Khronos EGL |
||||
* Registry repository at https://github.com/KhronosGroup/EGL-Registry
|
||||
* The last semantic modification to khrplatform.h was at commit ID: |
||||
* 67a3e0864c2d75ea5287b9f3d2eb74a745936692 |
||||
* |
||||
* Adopters may modify this file to suit their platform. Adopters are |
||||
* encouraged to submit platform specific modifications to the Khronos |
||||
* group so that they can be included in future versions of this file. |
||||
* Please submit changes by filing pull requests or issues on |
||||
* the EGL Registry repository linked above. |
||||
* |
||||
* |
||||
* See the Implementer's Guidelines for information about where this file |
||||
* should be located on your system and for more details of its use: |
||||
* http://www.khronos.org/registry/implementers_guide.pdf
|
||||
* |
||||
* This file should be included as |
||||
* #include <KHR/khrplatform.h> |
||||
* by Khronos client API header files that use its types and defines. |
||||
* |
||||
* The types in khrplatform.h should only be used to define API-specific types. |
||||
* |
||||
* Types defined in khrplatform.h: |
||||
* khronos_int8_t signed 8 bit |
||||
* khronos_uint8_t unsigned 8 bit |
||||
* khronos_int16_t signed 16 bit |
||||
* khronos_uint16_t unsigned 16 bit |
||||
* khronos_int32_t signed 32 bit |
||||
* khronos_uint32_t unsigned 32 bit |
||||
* khronos_int64_t signed 64 bit |
||||
* khronos_uint64_t unsigned 64 bit |
||||
* khronos_intptr_t signed same number of bits as a pointer |
||||
* khronos_uintptr_t unsigned same number of bits as a pointer |
||||
* khronos_ssize_t signed size |
||||
* khronos_usize_t unsigned size |
||||
* khronos_float_t signed 32 bit floating point |
||||
* khronos_time_ns_t unsigned 64 bit time in nanoseconds |
||||
* khronos_utime_nanoseconds_t unsigned time interval or absolute time in |
||||
* nanoseconds |
||||
* khronos_stime_nanoseconds_t signed time interval in nanoseconds |
||||
* khronos_boolean_enum_t enumerated boolean type. This should |
||||
* only be used as a base type when a client API's boolean type is |
||||
* an enum. Client APIs which use an integer or other type for |
||||
* booleans cannot use this as the base type for their boolean. |
||||
* |
||||
* Tokens defined in khrplatform.h: |
||||
* |
||||
* KHRONOS_FALSE, KHRONOS_TRUE Enumerated boolean false/true values. |
||||
* |
||||
* KHRONOS_SUPPORT_INT64 is 1 if 64 bit integers are supported; otherwise 0. |
||||
* KHRONOS_SUPPORT_FLOAT is 1 if floats are supported; otherwise 0. |
||||
* |
||||
* Calling convention macros defined in this file: |
||||
* KHRONOS_APICALL |
||||
* KHRONOS_APIENTRY |
||||
* KHRONOS_APIATTRIBUTES |
||||
* |
||||
* These may be used in function prototypes as: |
||||
* |
||||
* KHRONOS_APICALL void KHRONOS_APIENTRY funcname( |
||||
* int arg1, |
||||
* int arg2) KHRONOS_APIATTRIBUTES; |
||||
*/ |
||||
|
||||
#if defined(__SCITECH_SNAP__) && !defined(KHRONOS_STATIC) |
||||
# define KHRONOS_STATIC 1 |
||||
#endif |
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
* Definition of KHRONOS_APICALL |
||||
*------------------------------------------------------------------------- |
||||
* This precedes the return type of the function in the function prototype. |
||||
*/ |
||||
#if defined(KHRONOS_STATIC) |
||||
/* If the preprocessor constant KHRONOS_STATIC is defined, make the
|
||||
* header compatible with static linking. */ |
||||
# define KHRONOS_APICALL |
||||
#elif defined(_WIN32) |
||||
# define KHRONOS_APICALL __declspec(dllimport) |
||||
#elif defined (__SYMBIAN32__) |
||||
# define KHRONOS_APICALL IMPORT_C |
||||
#elif defined(__ANDROID__) |
||||
# define KHRONOS_APICALL __attribute__((visibility("default"))) |
||||
#else |
||||
# define KHRONOS_APICALL |
||||
#endif |
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
* Definition of KHRONOS_APIENTRY |
||||
*------------------------------------------------------------------------- |
||||
* This follows the return type of the function and precedes the function |
||||
* name in the function prototype. |
||||
*/ |
||||
#if defined(_WIN32) && !defined(_WIN32_WCE) && !defined(__SCITECH_SNAP__) |
||||
/* Win32 but not WinCE */ |
||||
# define KHRONOS_APIENTRY __stdcall |
||||
#else |
||||
# define KHRONOS_APIENTRY |
||||
#endif |
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
* Definition of KHRONOS_APIATTRIBUTES |
||||
*------------------------------------------------------------------------- |
||||
* This follows the closing parenthesis of the function prototype arguments. |
||||
*/ |
||||
#if defined (__ARMCC_2__) |
||||
#define KHRONOS_APIATTRIBUTES __softfp |
||||
#else |
||||
#define KHRONOS_APIATTRIBUTES |
||||
#endif |
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
* basic type definitions |
||||
*-----------------------------------------------------------------------*/ |
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || defined(__GNUC__) || defined(__SCO__) || defined(__USLC__) |
||||
|
||||
|
||||
/*
|
||||
* Using <stdint.h> |
||||
*/ |
||||
#include <stdint.h> |
||||
typedef int32_t khronos_int32_t; |
||||
typedef uint32_t khronos_uint32_t; |
||||
typedef int64_t khronos_int64_t; |
||||
typedef uint64_t khronos_uint64_t; |
||||
#define KHRONOS_SUPPORT_INT64 1 |
||||
#define KHRONOS_SUPPORT_FLOAT 1 |
||||
/*
|
||||
* To support platform where unsigned long cannot be used interchangeably with |
||||
* inptr_t (e.g. CHERI-extended ISAs), we can use the stdint.h intptr_t. |
||||
* Ideally, we could just use (u)intptr_t everywhere, but this could result in |
||||
* ABI breakage if khronos_uintptr_t is changed from unsigned long to |
||||
* unsigned long long or similar (this results in different C++ name mangling). |
||||
* To avoid changes for existing platforms, we restrict usage of intptr_t to |
||||
* platforms where the size of a pointer is larger than the size of long. |
||||
*/ |
||||
#if defined(__SIZEOF_LONG__) && defined(__SIZEOF_POINTER__) |
||||
#if __SIZEOF_POINTER__ > __SIZEOF_LONG__ |
||||
#define KHRONOS_USE_INTPTR_T |
||||
#endif |
||||
#endif |
||||
|
||||
#elif defined(__VMS ) || defined(__sgi) |
||||
|
||||
/*
|
||||
* Using <inttypes.h> |
||||
*/ |
||||
#include <inttypes.h> |
||||
typedef int32_t khronos_int32_t; |
||||
typedef uint32_t khronos_uint32_t; |
||||
typedef int64_t khronos_int64_t; |
||||
typedef uint64_t khronos_uint64_t; |
||||
#define KHRONOS_SUPPORT_INT64 1 |
||||
#define KHRONOS_SUPPORT_FLOAT 1 |
||||
|
||||
#elif defined(_WIN32) && !defined(__SCITECH_SNAP__) |
||||
|
||||
/*
|
||||
* Win32 |
||||
*/ |
||||
typedef __int32 khronos_int32_t; |
||||
typedef unsigned __int32 khronos_uint32_t; |
||||
typedef __int64 khronos_int64_t; |
||||
typedef unsigned __int64 khronos_uint64_t; |
||||
#define KHRONOS_SUPPORT_INT64 1 |
||||
#define KHRONOS_SUPPORT_FLOAT 1 |
||||
|
||||
#elif defined(__sun__) || defined(__digital__) |
||||
|
||||
/*
|
||||
* Sun or Digital |
||||
*/ |
||||
typedef int khronos_int32_t; |
||||
typedef unsigned int khronos_uint32_t; |
||||
#if defined(__arch64__) || defined(_LP64) |
||||
typedef long int khronos_int64_t; |
||||
typedef unsigned long int khronos_uint64_t; |
||||
#else |
||||
typedef long long int khronos_int64_t; |
||||
typedef unsigned long long int khronos_uint64_t; |
||||
#endif /* __arch64__ */ |
||||
#define KHRONOS_SUPPORT_INT64 1 |
||||
#define KHRONOS_SUPPORT_FLOAT 1 |
||||
|
||||
#elif 0 |
||||
|
||||
/*
|
||||
* Hypothetical platform with no float or int64 support |
||||
*/ |
||||
typedef int khronos_int32_t; |
||||
typedef unsigned int khronos_uint32_t; |
||||
#define KHRONOS_SUPPORT_INT64 0 |
||||
#define KHRONOS_SUPPORT_FLOAT 0 |
||||
|
||||
#else |
||||
|
||||
/*
|
||||
* Generic fallback |
||||
*/ |
||||
#include <stdint.h> |
||||
typedef int32_t khronos_int32_t; |
||||
typedef uint32_t khronos_uint32_t; |
||||
typedef int64_t khronos_int64_t; |
||||
typedef uint64_t khronos_uint64_t; |
||||
#define KHRONOS_SUPPORT_INT64 1 |
||||
#define KHRONOS_SUPPORT_FLOAT 1 |
||||
|
||||
#endif |
||||
|
||||
|
||||
/*
|
||||
* Types that are (so far) the same on all platforms |
||||
*/ |
||||
typedef signed char khronos_int8_t; |
||||
typedef unsigned char khronos_uint8_t; |
||||
typedef signed short int khronos_int16_t; |
||||
typedef unsigned short int khronos_uint16_t; |
||||
|
||||
/*
|
||||
* Types that differ between LLP64 and LP64 architectures - in LLP64, |
||||
* pointers are 64 bits, but 'long' is still 32 bits. Win64 appears |
||||
* to be the only LLP64 architecture in current use. |
||||
*/ |
||||
#ifdef KHRONOS_USE_INTPTR_T |
||||
typedef intptr_t khronos_intptr_t; |
||||
typedef uintptr_t khronos_uintptr_t; |
||||
#elif defined(_WIN64) |
||||
typedef signed long long int khronos_intptr_t; |
||||
typedef unsigned long long int khronos_uintptr_t; |
||||
#else |
||||
typedef signed long int khronos_intptr_t; |
||||
typedef unsigned long int khronos_uintptr_t; |
||||
#endif |
||||
|
||||
#if defined(_WIN64) |
||||
typedef signed long long int khronos_ssize_t; |
||||
typedef unsigned long long int khronos_usize_t; |
||||
#else |
||||
typedef signed long int khronos_ssize_t; |
||||
typedef unsigned long int khronos_usize_t; |
||||
#endif |
||||
|
||||
#if KHRONOS_SUPPORT_FLOAT |
||||
/*
|
||||
* Float type |
||||
*/ |
||||
typedef float khronos_float_t; |
||||
#endif |
||||
|
||||
#if KHRONOS_SUPPORT_INT64 |
||||
/* Time types
|
||||
* |
||||
* These types can be used to represent a time interval in nanoseconds or |
||||
* an absolute Unadjusted System Time. Unadjusted System Time is the number |
||||
* of nanoseconds since some arbitrary system event (e.g. since the last |
||||
* time the system booted). The Unadjusted System Time is an unsigned |
||||
* 64 bit value that wraps back to 0 every 584 years. Time intervals |
||||
* may be either signed or unsigned. |
||||
*/ |
||||
typedef khronos_uint64_t khronos_utime_nanoseconds_t; |
||||
typedef khronos_int64_t khronos_stime_nanoseconds_t; |
||||
#endif |
||||
|
||||
/*
|
||||
* Dummy value used to pad enum types to 32 bits. |
||||
*/ |
||||
#ifndef KHRONOS_MAX_ENUM |
||||
#define KHRONOS_MAX_ENUM 0x7FFFFFFF |
||||
#endif |
||||
|
||||
/*
|
||||
* Enumerated boolean type |
||||
* |
||||
* Values other than zero should be considered to be true. Therefore |
||||
* comparisons should not be made against KHRONOS_TRUE. |
||||
*/ |
||||
typedef enum { |
||||
KHRONOS_FALSE = 0, |
||||
KHRONOS_TRUE = 1, |
||||
KHRONOS_BOOLEAN_ENUM_FORCE_SIZE = KHRONOS_MAX_ENUM |
||||
} khronos_boolean_enum_t; |
||||
|
||||
#endif /* __khrplatform_h_ */ |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,45 @@
@@ -0,0 +1,45 @@
|
||||
# HIP-Basic Examples |
||||
|
||||
## Summary |
||||
The examples in this subdirectory showcase the functionality of the HIP runtime. The examples build on Linux for the ROCm (AMD GPU) backend. Some examples additionally support Windows, some examples additionally support the CUDA (NVIDIA GPU) backend. |
||||
|
||||
## Prerequisites |
||||
### Linux |
||||
- [CMake](https://cmake.org/download/) (at least version 3.21) |
||||
- OR GNU Make - available via the distribution's package manager |
||||
- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x) |
||||
|
||||
### Windows |
||||
- [Visual Studio](https://visualstudio.microsoft.com/) 2019 or 2022 with the "Desktop Development with C++" workload |
||||
- ROCm toolchain for Windows (No public release yet) |
||||
- The Visual Studio ROCm extension needs to be installed to build with the solution files. |
||||
- [CMake](https://cmake.org/download/) (optional, to build with CMake. Requires at least version 3.21) |
||||
- [Ninja](https://ninja-build.org/) (optional, to build with CMake) |
||||
|
||||
## Building |
||||
### Linux |
||||
Make sure that the dependencies are installed, or use one of the [provided Dockerfiles](../../Dockerfiles/) to build and run the examples in a containerized environment. |
||||
|
||||
#### Using CMake |
||||
All examples in the `HIP-Basic` subdirectory can either be built by a single CMake project or be built independently. |
||||
|
||||
- `$ cd Libraries/HIP-Basic` |
||||
- `$ cmake -S . -B build` (on ROCm) or `$ cmake -S . -B build -D GPU_RUNTIME=CUDA` (on CUDA, when supported) |
||||
- `$ cmake --build build` |
||||
|
||||
#### Using Make |
||||
All examples can be built by a single invocation to Make or be built independently. |
||||
|
||||
- `$ cd Libraries/HIP-Basic` |
||||
- `$ make` (on ROCm) or `$ make GPU_RUNTIME=CUDA` (on CUDA, when supported) |
||||
|
||||
### Windows |
||||
Not all HIP runtime examples support building on Windows. See the README file in the directory of the example for more details. |
||||
|
||||
#### Visual Studio |
||||
Visual Studio solution files are available for the individual examples. To build all supported HIP runtime examples open the top level solution file [ROCm-Examples-VS2019.sln](../../ROCm-Examples-VS2019.sln) and filter for HIP-Basic. |
||||
|
||||
For more detailed build instructions refer to the top level [README.md](../../README.md#visual-studio). |
||||
|
||||
#### CMake |
||||
All examples in the `HIP-Basic` subdirectory can either be built by a single CMake project or be built independently. For build instructions refer to the top-level [README.md](../../README.md#cmake-2). |
@ -1,183 +1,183 @@
@@ -1,183 +1,183 @@
|
||||
<?xml version="1.0" encoding="utf-8"?> |
||||
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> |
||||
<ItemGroup Label="ProjectConfigurations"> |
||||
<ProjectConfiguration Include="Debug|x64"> |
||||
<Configuration>Debug</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
<ProjectConfiguration Include="Release|x64"> |
||||
<Configuration>Release</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClCompile Include="main.hip"> |
||||
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">--cuda-host-only</AdditionalOptions> |
||||
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">--cuda-host-only</AdditionalOptions> |
||||
</ClCompile> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClInclude Include="..\..\Common\example_utils.hpp" /> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<CustomBuild Include="hip_obj_gen_win.mcin"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">copy %(Identity) "$(IntDir)%(Identity)"</Command> |
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Copying %(Identity)</Message> |
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Identity)</Outputs> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy %(Identity) "$(IntDir)%(Identity)"</Command> |
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Copying %(Identity)</Message> |
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Identity)</Outputs> |
||||
</CustomBuild> |
||||
<CustomBuild Include="main_gfx1030.s"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx1030</Command> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx1030</Command> |
||||
</CustomBuild> |
||||
<CustomBuild Include="main_gfx803.s"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx803</Command> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx803</Command> |
||||
</CustomBuild> |
||||
<CustomBuild Include="main_gfx900.s"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ClangToolPath)clang+"+ -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx900</Command> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ClangToolPath)clang+"+ -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx900</Command> |
||||
</CustomBuild> |
||||
<CustomBuild Include="main_gfx906.s"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx906</Command> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx906</Command> |
||||
</CustomBuild> |
||||
<CustomBuild Include="main_gfx908.s"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx908</Command> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx908</Command> |
||||
</CustomBuild> |
||||
<CustomBuild Include="main_gfx90a.s"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx90a </Command> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx90a </Command> |
||||
</CustomBuild> |
||||
</ItemGroup> |
||||
<PropertyGroup Label="Globals"> |
||||
<VCProjectVersion>15.0</VCProjectVersion> |
||||
<ProjectGuid>{60b4ade0-8286-46ae-b884-5da51b541ded}</ProjectGuid> |
||||
<Keyword>Win32Proj</Keyword> |
||||
<RootNamespace>assembly_to_executable_vs2019</RootNamespace> |
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>true</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>false</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<WholeProgramOptimization>true</WholeProgramOptimization> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> |
||||
<ImportGroup Label="ExtensionSettings"> |
||||
<Import Condition="'$(HIPPropertiesImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.props" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="Shared"> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<PropertyGroup Label="UserMacros" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<LinkIncremental>true</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
<CustomBuildBeforeTargets>ClCompile</CustomBuildBeforeTargets> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<LinkIncremental>false</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
<CustomBuildBeforeTargets>ClCompile</CustomBuildBeforeTargets> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level1</WarningLevel> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
<AdditionalDependencies>$(IntDir)main_device.obj;%(AdditionalDependencies)</AdditionalDependencies> |
||||
</Link> |
||||
<CustomBuild> |
||||
<Message>Compiling Device Assembly %(Identity)</Message> |
||||
<Command>"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa</Command> |
||||
<Outputs>$(IntDir)%(FileName).o</Outputs> |
||||
</CustomBuild> |
||||
<CustomBuildStep> |
||||
<Command>"$(ClangToolPath)clang-offload-bundler" -type=o -bundle-align=4096 -targets=host-x86_64-pc-windows-msvc,hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900,hipv4-amdgcn-amd-amdhsa--gfx906,hipv4-amdgcn-amd-amdhsa--gfx908,hipv4-amdgcn-amd-amdhsa--gfx90a,hipv4-amdgcn-amd-amdhsa--gfx1030 -input=nul "-input=$(IntDir)main_gfx803.o" "-input=$(IntDir)main_gfx900.o" "-input=$(IntDir)main_gfx906.o" "-input=$(IntDir)main_gfx908.o" "-input=$(IntDir)main_gfx90a.o" "-input=$(IntDir)main_gfx1030.o" "-output=$(IntDir)offload_bundle.hipfb" |
||||
cd $(IntDir) && "$(ClangToolPath)llvm-mc" -triple host-x86_64-pc-windows-msvc "hip_obj_gen_win.mcin" -o "main_device.obj" --filetype=obj</Command> |
||||
</CustomBuildStep> |
||||
<CustomBuildStep> |
||||
<Message>Generating Device Offload Object</Message> |
||||
</CustomBuildStep> |
||||
<CustomBuildStep> |
||||
<Outputs>$(IntDIr)main_device.obj</Outputs> |
||||
</CustomBuildStep> |
||||
<CustomBuildStep> |
||||
<Inputs>$(IntDir)main_gfx803.o;$(IntDir)main_gfx900.o;$(IntDir)main_gfx906.o;$(IntDir)main_gfx908.o;$(IntDir)main_gfx90a.o;$(IntDir)main_gfx1030.o;$(IntDir)hip_objgen_win.mcin;%(Inputs)</Inputs> |
||||
</CustomBuildStep> |
||||
</ItemDefinitionGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level2</WarningLevel> |
||||
<FunctionLevelLinking>true</FunctionLevelLinking> |
||||
<IntrinsicFunctions>true</IntrinsicFunctions> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding> |
||||
<OptimizeReferences>true</OptimizeReferences> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
<AdditionalDependencies>$(IntDir)main_device.obj;%(AdditionalDependencies)</AdditionalDependencies> |
||||
</Link> |
||||
<CustomBuild> |
||||
<Message>Compiling Device Assembly %(Identity)</Message> |
||||
<Command>"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa</Command> |
||||
<Outputs>$(IntDir)%(FileName).o</Outputs> |
||||
</CustomBuild> |
||||
<CustomBuildStep> |
||||
<Command>"$(ClangToolPath)clang-offload-bundler" -type=o -bundle-align=4096 -targets=host-x86_64-pc-windows-msvc,hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900,hipv4-amdgcn-amd-amdhsa--gfx906,hipv4-amdgcn-amd-amdhsa--gfx908,hipv4-amdgcn-amd-amdhsa-gfx90a,hipv4-amdgcn-amd-amdhsa--gfx1030 -input=nul "-input=$(IntDir)main_gfx803.o" "-input=$(IntDir)main_gfx900.o" "-input=$(IntDir)main_gfx906.o" "-input=$(IntDir)main_gfx908.o" "-input=$(IntDir)main_gfx90a.o" "-input=$(IntDir)main_gfx1030.o" "-output=$(IntDir)offload_bundle.hipfb" |
||||
cd $(IntDir) && "$(ClangToolPath)llvm-mc" -triple host-x86_64-pc-windows-msvc "hip_obj_gen_win.mcin" -o "main_device.obj" --filetype=obj</Command> |
||||
</CustomBuildStep> |
||||
<CustomBuildStep> |
||||
<Message>Generating Device Offload Object</Message> |
||||
</CustomBuildStep> |
||||
<CustomBuildStep> |
||||
<Outputs>$(IntDIr)main_device.obj</Outputs> |
||||
</CustomBuildStep> |
||||
<CustomBuildStep> |
||||
<Inputs>$(IntDir)main_gfx803.o;$(IntDir)main_gfx900.o;$(IntDir)main_gfx906.o;$(IntDir)main_gfx908.o;$(IntDir)main_gfx90a.o;$(IntDir)main_gfx1030.o;$(IntDir)hip_objgen_win.mcin;%(Inputs)</Inputs> |
||||
</CustomBuildStep> |
||||
</ItemDefinitionGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> |
||||
<ImportGroup Label="ExtensionTargets"> |
||||
<Import Condition="'$(HIPTargetsImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.targets" /> |
||||
</ImportGroup> |
||||
</Project> |
||||
<?xml version="1.0" encoding="utf-8"?> |
||||
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> |
||||
<ItemGroup Label="ProjectConfigurations"> |
||||
<ProjectConfiguration Include="Debug|x64"> |
||||
<Configuration>Debug</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
<ProjectConfiguration Include="Release|x64"> |
||||
<Configuration>Release</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClCompile Include="main.hip"> |
||||
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">--cuda-host-only</AdditionalOptions> |
||||
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">--cuda-host-only</AdditionalOptions> |
||||
</ClCompile> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClInclude Include="..\..\Common\example_utils.hpp" /> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<CustomBuild Include="hip_obj_gen_win.mcin"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">copy %(Identity) "$(IntDir)%(Identity)"</Command> |
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Copying %(Identity)</Message> |
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Identity)</Outputs> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy %(Identity) "$(IntDir)%(Identity)"</Command> |
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Copying %(Identity)</Message> |
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Identity)</Outputs> |
||||
</CustomBuild> |
||||
<CustomBuild Include="main_gfx1030.s"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx1030</Command> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx1030</Command> |
||||
</CustomBuild> |
||||
<CustomBuild Include="main_gfx803.s"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx803</Command> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx803</Command> |
||||
</CustomBuild> |
||||
<CustomBuild Include="main_gfx900.s"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ClangToolPath)clang+"+ -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx900</Command> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ClangToolPath)clang+"+ -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx900</Command> |
||||
</CustomBuild> |
||||
<CustomBuild Include="main_gfx906.s"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx906</Command> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx906</Command> |
||||
</CustomBuild> |
||||
<CustomBuild Include="main_gfx908.s"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx908</Command> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx908</Command> |
||||
</CustomBuild> |
||||
<CustomBuild Include="main_gfx90a.s"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx90a </Command> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx90a </Command> |
||||
</CustomBuild> |
||||
</ItemGroup> |
||||
<PropertyGroup Label="Globals"> |
||||
<VCProjectVersion>15.0</VCProjectVersion> |
||||
<ProjectGuid>{60b4ade0-8286-46ae-b884-5da51b541ded}</ProjectGuid> |
||||
<Keyword>Win32Proj</Keyword> |
||||
<RootNamespace>assembly_to_executable_vs2019</RootNamespace> |
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>true</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>false</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<WholeProgramOptimization>true</WholeProgramOptimization> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> |
||||
<ImportGroup Label="ExtensionSettings"> |
||||
<Import Condition="'$(HIPPropertiesImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.props" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="Shared"> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<PropertyGroup Label="UserMacros" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<LinkIncremental>true</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
<CustomBuildBeforeTargets>ClCompile</CustomBuildBeforeTargets> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<LinkIncremental>false</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
<CustomBuildBeforeTargets>ClCompile</CustomBuildBeforeTargets> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level2</WarningLevel> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
<AdditionalDependencies>$(IntDir)main_device.obj;%(AdditionalDependencies)</AdditionalDependencies> |
||||
</Link> |
||||
<CustomBuild> |
||||
<Message>Compiling Device Assembly %(Identity)</Message> |
||||
<Command>"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa</Command> |
||||
<Outputs>$(IntDir)%(FileName).o</Outputs> |
||||
</CustomBuild> |
||||
<CustomBuildStep> |
||||
<Command>"$(ClangToolPath)clang-offload-bundler" -type=o -bundle-align=4096 -targets=host-x86_64-pc-windows-msvc,hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900,hipv4-amdgcn-amd-amdhsa--gfx906,hipv4-amdgcn-amd-amdhsa--gfx908,hipv4-amdgcn-amd-amdhsa--gfx90a,hipv4-amdgcn-amd-amdhsa--gfx1030 -input=nul "-input=$(IntDir)main_gfx803.o" "-input=$(IntDir)main_gfx900.o" "-input=$(IntDir)main_gfx906.o" "-input=$(IntDir)main_gfx908.o" "-input=$(IntDir)main_gfx90a.o" "-input=$(IntDir)main_gfx1030.o" "-output=$(IntDir)offload_bundle.hipfb" |
||||
cd $(IntDir) && "$(ClangToolPath)llvm-mc" -triple host-x86_64-pc-windows-msvc "hip_obj_gen_win.mcin" -o "main_device.obj" --filetype=obj</Command> |
||||
</CustomBuildStep> |
||||
<CustomBuildStep> |
||||
<Message>Generating Device Offload Object</Message> |
||||
</CustomBuildStep> |
||||
<CustomBuildStep> |
||||
<Outputs>$(IntDIr)main_device.obj</Outputs> |
||||
</CustomBuildStep> |
||||
<CustomBuildStep> |
||||
<Inputs>$(IntDir)main_gfx803.o;$(IntDir)main_gfx900.o;$(IntDir)main_gfx906.o;$(IntDir)main_gfx908.o;$(IntDir)main_gfx90a.o;$(IntDir)main_gfx1030.o;$(IntDir)hip_objgen_win.mcin;%(Inputs)</Inputs> |
||||
</CustomBuildStep> |
||||
</ItemDefinitionGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level2</WarningLevel> |
||||
<FunctionLevelLinking>true</FunctionLevelLinking> |
||||
<IntrinsicFunctions>true</IntrinsicFunctions> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding> |
||||
<OptimizeReferences>true</OptimizeReferences> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
<AdditionalDependencies>$(IntDir)main_device.obj;%(AdditionalDependencies)</AdditionalDependencies> |
||||
</Link> |
||||
<CustomBuild> |
||||
<Message>Compiling Device Assembly %(Identity)</Message> |
||||
<Command>"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa</Command> |
||||
<Outputs>$(IntDir)%(FileName).o</Outputs> |
||||
</CustomBuild> |
||||
<CustomBuildStep> |
||||
<Command>"$(ClangToolPath)clang-offload-bundler" -type=o -bundle-align=4096 -targets=host-x86_64-pc-windows-msvc,hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900,hipv4-amdgcn-amd-amdhsa--gfx906,hipv4-amdgcn-amd-amdhsa--gfx908,hipv4-amdgcn-amd-amdhsa-gfx90a,hipv4-amdgcn-amd-amdhsa--gfx1030 -input=nul "-input=$(IntDir)main_gfx803.o" "-input=$(IntDir)main_gfx900.o" "-input=$(IntDir)main_gfx906.o" "-input=$(IntDir)main_gfx908.o" "-input=$(IntDir)main_gfx90a.o" "-input=$(IntDir)main_gfx1030.o" "-output=$(IntDir)offload_bundle.hipfb" |
||||
cd $(IntDir) && "$(ClangToolPath)llvm-mc" -triple host-x86_64-pc-windows-msvc "hip_obj_gen_win.mcin" -o "main_device.obj" --filetype=obj</Command> |
||||
</CustomBuildStep> |
||||
<CustomBuildStep> |
||||
<Message>Generating Device Offload Object</Message> |
||||
</CustomBuildStep> |
||||
<CustomBuildStep> |
||||
<Outputs>$(IntDIr)main_device.obj</Outputs> |
||||
</CustomBuildStep> |
||||
<CustomBuildStep> |
||||
<Inputs>$(IntDir)main_gfx803.o;$(IntDir)main_gfx900.o;$(IntDir)main_gfx906.o;$(IntDir)main_gfx908.o;$(IntDir)main_gfx90a.o;$(IntDir)main_gfx1030.o;$(IntDir)hip_objgen_win.mcin;%(Inputs)</Inputs> |
||||
</CustomBuildStep> |
||||
</ItemDefinitionGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> |
||||
<ImportGroup Label="ExtensionTargets"> |
||||
<Import Condition="'$(HIPTargetsImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.targets" /> |
||||
</ImportGroup> |
||||
</Project> |
||||
|
@ -1,102 +1,102 @@
@@ -1,102 +1,102 @@
|
||||
<?xml version="1.0" encoding="utf-8"?> |
||||
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> |
||||
<ItemGroup Label="ProjectConfigurations"> |
||||
<ProjectConfiguration Include="Debug|x64"> |
||||
<Configuration>Debug</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
<ProjectConfiguration Include="Release|x64"> |
||||
<Configuration>Release</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClCompile Include="main.hip" /> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClInclude Include="..\..\Common\cmdparser.hpp" /> |
||||
<ClInclude Include="..\..\Common\example_utils.hpp" /> |
||||
</ItemGroup> |
||||
<PropertyGroup Label="Globals"> |
||||
<VCProjectVersion>15.0</VCProjectVersion> |
||||
<ProjectGuid>{16b11b54-cd72-43b6-b226-38c668b41a79}</ProjectGuid> |
||||
<Keyword>Win32Proj</Keyword> |
||||
<RootNamespace>bandwidth_vs2019</RootNamespace> |
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>true</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>false</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<WholeProgramOptimization>true</WholeProgramOptimization> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> |
||||
<ImportGroup Label="ExtensionSettings"> |
||||
<Import Condition="'$(HIPPropertiesImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.props" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="Shared"> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<PropertyGroup Label="UserMacros" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<LinkIncremental>true</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<LinkIncremental>false</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level1</WarningLevel> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
<RuntimeTypeInfo>true</RuntimeTypeInfo> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
</Link> |
||||
</ItemDefinitionGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level2</WarningLevel> |
||||
<FunctionLevelLinking>true</FunctionLevelLinking> |
||||
<IntrinsicFunctions>true</IntrinsicFunctions> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
<RuntimeTypeInfo>true</RuntimeTypeInfo> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding> |
||||
<OptimizeReferences>true</OptimizeReferences> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
</Link> |
||||
</ItemDefinitionGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> |
||||
<ImportGroup Label="ExtensionTargets"> |
||||
<Import Condition="'$(HIPTargetsImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.targets" /> |
||||
</ImportGroup> |
||||
</Project> |
||||
<?xml version="1.0" encoding="utf-8"?> |
||||
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> |
||||
<ItemGroup Label="ProjectConfigurations"> |
||||
<ProjectConfiguration Include="Debug|x64"> |
||||
<Configuration>Debug</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
<ProjectConfiguration Include="Release|x64"> |
||||
<Configuration>Release</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClCompile Include="main.hip" /> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClInclude Include="..\..\Common\cmdparser.hpp" /> |
||||
<ClInclude Include="..\..\Common\example_utils.hpp" /> |
||||
</ItemGroup> |
||||
<PropertyGroup Label="Globals"> |
||||
<VCProjectVersion>15.0</VCProjectVersion> |
||||
<ProjectGuid>{16b11b54-cd72-43b6-b226-38c668b41a79}</ProjectGuid> |
||||
<Keyword>Win32Proj</Keyword> |
||||
<RootNamespace>bandwidth_vs2019</RootNamespace> |
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>true</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>false</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<WholeProgramOptimization>true</WholeProgramOptimization> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> |
||||
<ImportGroup Label="ExtensionSettings"> |
||||
<Import Condition="'$(HIPPropertiesImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.props" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="Shared"> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<PropertyGroup Label="UserMacros" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<LinkIncremental>true</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<LinkIncremental>false</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level2</WarningLevel> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
<RuntimeTypeInfo>true</RuntimeTypeInfo> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
</Link> |
||||
</ItemDefinitionGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level2</WarningLevel> |
||||
<FunctionLevelLinking>true</FunctionLevelLinking> |
||||
<IntrinsicFunctions>true</IntrinsicFunctions> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
<RuntimeTypeInfo>true</RuntimeTypeInfo> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding> |
||||
<OptimizeReferences>true</OptimizeReferences> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
</Link> |
||||
</ItemDefinitionGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> |
||||
<ImportGroup Label="ExtensionTargets"> |
||||
<Import Condition="'$(HIPTargetsImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.targets" /> |
||||
</ImportGroup> |
||||
</Project> |
||||
|
@ -0,0 +1 @@
@@ -0,0 +1 @@
|
||||
hip_bit_extract |
@ -0,0 +1,57 @@
@@ -0,0 +1,57 @@
|
||||
# MIT License |
||||
# |
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
# |
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
# of this software and associated documentation files (the "Software"), to deal |
||||
# in the Software without restriction, including without limitation the rights |
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
# copies of the Software, and to permit persons to whom the Software is |
||||
# furnished to do so, subject to the following conditions: |
||||
# |
||||
# The above copyright notice and this permission notice shall be included in all |
||||
# copies or substantial portions of the Software. |
||||
# |
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||
# SOFTWARE. |
||||
|
||||
set(example_name hip_bit_extract) |
||||
|
||||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR) |
||||
project(${example_name} LANGUAGES CXX) |
||||
|
||||
set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA") |
||||
set(GPU_RUNTIMES "HIP" "CUDA") |
||||
set_property(CACHE GPU_RUNTIME PROPERTY STRINGS ${GPU_RUNTIMES}) |
||||
|
||||
if(NOT "${GPU_RUNTIME}" IN_LIST GPU_RUNTIMES) |
||||
set(ERROR_MESSAGE "GPU_RUNTIME is set to \"${GPU_RUNTIME}\".\nGPU_RUNTIME must be either HIP or CUDA.") |
||||
message(FATAL_ERROR ${ERROR_MESSAGE}) |
||||
endif() |
||||
|
||||
enable_language(${GPU_RUNTIME}) |
||||
set(CMAKE_${GPU_RUNTIME}_STANDARD 17) |
||||
set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF) |
||||
set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON) |
||||
|
||||
set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation") |
||||
if(NOT CMAKE_PREFIX_PATH) |
||||
set(CMAKE_PREFIX_PATH "${ROCM_ROOT}") |
||||
endif() |
||||
|
||||
add_executable(${example_name} main.hip) |
||||
# Make example runnable using ctest |
||||
add_test(${example_name} ${example_name}) |
||||
|
||||
set(include_dirs "../../Common") |
||||
if(GPU_RUNTIME STREQUAL "CUDA") |
||||
list(APPEND include_dirs "${ROCM_ROOT}/include") |
||||
endif() |
||||
|
||||
target_include_directories(${example_name} PRIVATE ${include_dirs}) |
||||
set_source_files_properties(main.hip PROPERTIES LANGUAGE ${GPU_RUNTIME}) |
@ -0,0 +1,60 @@
@@ -0,0 +1,60 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
EXAMPLE := hip_bit_extract |
||||
COMMON_INCLUDE_DIR := ../../Common |
||||
GPU_RUNTIME := HIP |
||||
|
||||
# HIP variables
|
||||
ROCM_INSTALL_DIR := /opt/rocm |
||||
HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include |
||||
|
||||
HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc |
||||
|
||||
# Common variables and flags
|
||||
CXX_STD := c++17 |
||||
ICXXFLAGS := -std=$(CXX_STD) |
||||
ICPPFLAGS := -I $(COMMON_INCLUDE_DIR) |
||||
ILDFLAGS := |
||||
ILDLIBS := |
||||
|
||||
ifeq ($(GPU_RUNTIME), CUDA) |
||||
ICXXFLAGS += -x cu |
||||
ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR) |
||||
else ifeq ($(GPU_RUNTIME), HIP) |
||||
CXXFLAGS ?= -Wall -Wextra |
||||
else |
||||
$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP) |
||||
endif |
||||
|
||||
ICXXFLAGS += $(CXXFLAGS) |
||||
ICPPFLAGS += $(CPPFLAGS) |
||||
ILDFLAGS += $(LDFLAGS) |
||||
ILDLIBS += $(LDLIBS) |
||||
|
||||
$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp |
||||
$(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS) |
||||
|
||||
clean: |
||||
$(RM) $(EXAMPLE) |
||||
|
||||
.PHONY: clean |
@ -0,0 +1,33 @@
@@ -0,0 +1,33 @@
|
||||
# HIP-Basic Bit Extract Example |
||||
|
||||
## Description |
||||
A HIP-specific bit extract solution is presented in this example. |
||||
|
||||
### Application flow |
||||
1. Allocate memory for host vectors. |
||||
2. Fill the input host vector as an arithmetic sequence by the vector index. |
||||
3. Allocate memory for device arrays. |
||||
4. Copy the arithmetic sequence from the host to device memory. |
||||
5. Apply bit extract operator on the sequence element by element and return with result array. If we use HIP, __bitextract_u32() device function is used, otherwise the standard bit shift operator. |
||||
6. Copy the result sequence from the device to the host memory |
||||
7. Compare the result sequence to the expected sequence, element by element. If a mismatch is detected, the vector index and both values are printed, and the program exits with an error code. |
||||
8. Deallocate device and host memory. |
||||
9. "PASSED!" is printed when the flow was successful. |
||||
|
||||
## Key APIs and Concepts |
||||
- `hipLaunchKernelGGL(kernel_name, grid_dim, block_dim, dynamic_shared_memory_size, stream, <kernel arguments>)` is the HIP kernel launcher where the grid and block dimension, dynamic shared memory size and HIP stream is defined. We use NULL stream in the recent example. |
||||
- `__bitextract_u32(source, bit_start, num_bits)` is the built-in AMD HIP bit extract operator, where we define a source scalar, a `bit_start` start bit and a `num_bits` number of extraction bits. The operator returns with a scalar value. |
||||
|
||||
## Demonstrated API Calls |
||||
### HIP runtime |
||||
#### Device symbols |
||||
- `threadIdx`, `blockIdx`, `blockDim`, `gridDim` |
||||
- `__bitextract_u32` |
||||
|
||||
#### Host symbols |
||||
- `hipMalloc` |
||||
- `hipFree` |
||||
- `hipMemcpy` |
||||
- `hipMemcpyHostToDevice` |
||||
- `hipMemcpyDeviceToHost` |
||||
- `hipLaunchKernelGGL` |
@ -0,0 +1,99 @@
@@ -0,0 +1,99 @@
|
||||
<?xml version="1.0" encoding="utf-8"?> |
||||
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> |
||||
<ItemGroup Label="ProjectConfigurations"> |
||||
<ProjectConfiguration Include="Debug|x64"> |
||||
<Configuration>Debug</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
<ProjectConfiguration Include="Release|x64"> |
||||
<Configuration>Release</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClCompile Include="main.hip" /> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClInclude Include="..\..\Common\example_utils.hpp" /> |
||||
</ItemGroup> |
||||
<PropertyGroup Label="Globals"> |
||||
<VCProjectVersion>15.0</VCProjectVersion> |
||||
<ProjectGuid>{63823DD0-787C-42AE-B6E7-C03CF4CF5CE2}</ProjectGuid> |
||||
<Keyword>Win32Proj</Keyword> |
||||
<RootNamespace>bit_extract_vs2019</RootNamespace> |
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>true</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>false</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<WholeProgramOptimization>true</WholeProgramOptimization> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> |
||||
<ImportGroup Label="ExtensionSettings"> |
||||
<Import Condition="'$(HIPPropertiesImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.props" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="Shared"> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<PropertyGroup Label="UserMacros" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<LinkIncremental>true</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<LinkIncremental>false</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level2</WarningLevel> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
</Link> |
||||
</ItemDefinitionGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level2</WarningLevel> |
||||
<FunctionLevelLinking>true</FunctionLevelLinking> |
||||
<IntrinsicFunctions>true</IntrinsicFunctions> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding> |
||||
<OptimizeReferences>true</OptimizeReferences> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
</Link> |
||||
</ItemDefinitionGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> |
||||
<ImportGroup Label="ExtensionTargets"> |
||||
<Import Condition="'$(HIPTargetsImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.targets" /> |
||||
</ImportGroup> |
||||
</Project> |
@ -0,0 +1,111 @@
@@ -0,0 +1,111 @@
|
||||
// MIT License |
||||
// |
||||
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
// |
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
// of this software and associated documentation files (the "Software"), to deal |
||||
// in the Software without restriction, including without limitation the rights |
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
// copies of the Software, and to permit persons to whom the Software is |
||||
// furnished to do so, subject to the following conditions: |
||||
// |
||||
// The above copyright notice and this permission notice shall be included in all |
||||
// copies or substantial portions of the Software. |
||||
// |
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||
// SOFTWARE. |
||||
|
||||
#include "example_utils.hpp" |
||||
|
||||
#include <hip/hip_runtime.h> |
||||
|
||||
#include <iostream> |
||||
#include <vector> |
||||
|
||||
/// \brief A simple bit extract kernel for unsigned 32-bit integer that returns an 8-bit extracted array. |
||||
/// - If the code was compiled on AMD HIP platform, the __bitextract_u32() built-in function is used. |
||||
/// - Otherwise default C++ extract operator is used. |
||||
|
||||
__global__ void bit_extract_kernel(uint32_t* d_output, const uint32_t* d_input, size_t size) |
||||
{ |
||||
const size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); |
||||
const size_t stride = blockDim.x * gridDim.x; |
||||
|
||||
for(size_t i = offset; i < size; i += stride) |
||||
{ |
||||
#ifdef __HIP_PLATFORM_AMD__ |
||||
d_output[i] = __bitextract_u32(d_input[i], 8, 4); |
||||
#else /* defined __HIP_PLATFORM_NVIDIA__ or other path */ |
||||
d_output[i] = ((d_input[i] & 0xf00) >> 8); |
||||
#endif |
||||
} |
||||
} |
||||
|
||||
int main() |
||||
{ |
||||
constexpr size_t size = 1000000; |
||||
constexpr size_t size_in_bytes = size * sizeof(uint32_t); |
||||
|
||||
// Allocate host vectors |
||||
std::vector<uint32_t> h_input(size); |
||||
std::vector<uint32_t> h_output(size); |
||||
|
||||
// Set up input data |
||||
for(size_t i = 0; i < size; i++) |
||||
{ |
||||
h_input[i] = i; |
||||
} |
||||
|
||||
// Allocate device memory for the input and output data |
||||
uint32_t *d_input, *d_output; |
||||
HIP_CHECK(hipMalloc(&d_input, size_in_bytes)); |
||||
HIP_CHECK(hipMalloc(&d_output, size_in_bytes)); |
||||
|
||||
// Copy data from host to device |
||||
HIP_CHECK(hipMemcpy(d_input, h_input.data(), size_in_bytes, hipMemcpyHostToDevice)); |
||||
|
||||
// Launch bit_extract_kernel() |
||||
constexpr unsigned int number_of_blocks = 512; |
||||
constexpr unsigned int threads_per_block = 256; |
||||
hipLaunchKernelGGL(bit_extract_kernel, |
||||
dim3(number_of_blocks), |
||||
dim3(threads_per_block), |
||||
0, |
||||
hipStreamDefault, |
||||
d_output, |
||||
d_input, |
||||
size); |
||||
|
||||
// Copy data from device to host |
||||
HIP_CHECK(hipMemcpy(h_output.data(), d_output, size_in_bytes, hipMemcpyDeviceToHost)); |
||||
|
||||
// Free device memory |
||||
HIP_CHECK(hipFree(d_input)); |
||||
HIP_CHECK(hipFree(d_output)); |
||||
|
||||
// Check result validity |
||||
unsigned int errors{}; |
||||
for(size_t i = 0; i < size; i++) |
||||
{ |
||||
uint32_t reference_value = ((h_input[i] & 0xf00) >> 8); |
||||
if(h_output[i] != reference_value) |
||||
{ |
||||
errors++; |
||||
} |
||||
} |
||||
|
||||
if(errors != 0) |
||||
{ |
||||
std::cout << "Validation failed. Errors: " << errors << std::endl; |
||||
return error_exit_code; |
||||
} |
||||
else |
||||
{ |
||||
std::cout << "Validation passed." << std::endl; |
||||
} |
||||
} |
@ -0,0 +1 @@
@@ -0,0 +1 @@
|
||||
hip_cooperative_groups |
@ -0,0 +1,57 @@
@@ -0,0 +1,57 @@
|
||||
# MIT License |
||||
# |
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
# |
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
# of this software and associated documentation files (the "Software"), to deal |
||||
# in the Software without restriction, including without limitation the rights |
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
# copies of the Software, and to permit persons to whom the Software is |
||||
# furnished to do so, subject to the following conditions: |
||||
# |
||||
# The above copyright notice and this permission notice shall be included in all |
||||
# copies or substantial portions of the Software. |
||||
# |
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||
# SOFTWARE. |
||||
|
||||
set(example_name hip_cooperative_groups) |
||||
|
||||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR) |
||||
project(${example_name} LANGUAGES CXX) |
||||
|
||||
set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA") |
||||
set(GPU_RUNTIMES "HIP" "CUDA") |
||||
set_property(CACHE GPU_RUNTIME PROPERTY STRINGS ${GPU_RUNTIMES}) |
||||
|
||||
if(NOT "${GPU_RUNTIME}" IN_LIST GPU_RUNTIMES) |
||||
set(ERROR_MESSAGE "GPU_RUNTIME is set to \"${GPU_RUNTIME}\".\nGPU_RUNTIME must be either HIP or CUDA.") |
||||
message(FATAL_ERROR ${ERROR_MESSAGE}) |
||||
endif() |
||||
|
||||
enable_language(${GPU_RUNTIME}) |
||||
set(CMAKE_${GPU_RUNTIME}_STANDARD 17) |
||||
set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF) |
||||
set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON) |
||||
|
||||
set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation") |
||||
if(NOT CMAKE_PREFIX_PATH) |
||||
set(CMAKE_PREFIX_PATH "${ROCM_ROOT}") |
||||
endif() |
||||
|
||||
add_executable(${example_name} main.hip) |
||||
# Make example runnable using ctest |
||||
add_test(${example_name} ${example_name}) |
||||
|
||||
set(include_dirs "../../Common") |
||||
if(GPU_RUNTIME STREQUAL "CUDA") |
||||
list(APPEND include_dirs "${ROCM_ROOT}/include") |
||||
endif() |
||||
|
||||
target_include_directories(${example_name} PRIVATE ${include_dirs}) |
||||
set_source_files_properties(main.hip PROPERTIES LANGUAGE ${GPU_RUNTIME}) |
@ -0,0 +1,60 @@
@@ -0,0 +1,60 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
EXAMPLE := hip_cooperative_groups |
||||
COMMON_INCLUDE_DIR := ../../Common |
||||
GPU_RUNTIME := HIP |
||||
|
||||
# HIP variables
|
||||
ROCM_INSTALL_DIR := /opt/rocm |
||||
HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include |
||||
|
||||
HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc |
||||
|
||||
# Common variables and flags
|
||||
CXX_STD := c++17 |
||||
ICXXFLAGS := -std=$(CXX_STD) |
||||
ICPPFLAGS := -I $(COMMON_INCLUDE_DIR) |
||||
ILDFLAGS := |
||||
ILDLIBS := |
||||
|
||||
ifeq ($(GPU_RUNTIME), CUDA) |
||||
ICXXFLAGS += -x cu |
||||
ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR) |
||||
else ifeq ($(GPU_RUNTIME), HIP) |
||||
CXXFLAGS ?= -Wall -Wextra |
||||
else |
||||
$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP) |
||||
endif |
||||
|
||||
ICXXFLAGS += $(CXXFLAGS) |
||||
ICPPFLAGS += $(CPPFLAGS) |
||||
ILDFLAGS += $(LDFLAGS) |
||||
ILDLIBS += $(LDLIBS) |
||||
|
||||
$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp |
||||
$(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS) |
||||
|
||||
clean: |
||||
$(RM) $(EXAMPLE) |
||||
|
||||
.PHONY: clean |
@ -0,0 +1,41 @@
@@ -0,0 +1,41 @@
|
||||
# HIP-Basic Cooperative Groups Example |
||||
|
||||
## Description |
||||
This program showcases the usage of Cooperative Groups inside a reduction kernel. |
||||
|
||||
Cooperative groups can be used to gain more control over synchronization. |
||||
|
||||
For more insights, you can read the following blog post: |
||||
[Cooperative Groups: Flexible CUDA Thread Programming](https://developer.nvidia.com/blog/cooperative-groups/) |
||||
|
||||
### Application flow |
||||
1. A number of variables are defined to control the problem details and the kernel launch parameters. |
||||
2. Input vector is set up in host memory. |
||||
3. The input is copied to the device. |
||||
4. The GPU reduction kernel is launched with previously defined arguments. |
||||
5. The kernel will perform two reductions: a reduction of the whole threadblock and a reduction of custom partitions. |
||||
6. The result vectors are copied back to the host and all device memory is freed. |
||||
7. The elements of the result vectors are compared with the expected result. The result of the comparison is printed to the standard output. |
||||
|
||||
## Key APIs and Concepts |
||||
Usually, programmers can only synchronize on warp-level or block-level. |
||||
But cooperative groups allows the programmer to partition threads together and subsequently synchronize those groups. |
||||
The partitioned threads can reside across multiple devices. |
||||
|
||||
## Demonstrated API Calls |
||||
### HIP runtime |
||||
#### Device symbols |
||||
- `thread_group` |
||||
- `thread_block` |
||||
- `tiled_partition<size>()` |
||||
- `thread_block_tile` |
||||
- All above from the [`cooperative_groups` namespace](https://github.com/ROCm-Developer-Tools/hipamd/blob/develop/include/hip/amd_detail/amd_hip_cooperative_groups.h) |
||||
#### Host symbols |
||||
- `hipMalloc` |
||||
- `hipMemcpy` |
||||
- `hipLaunchCooperativeKernel` |
||||
- `hipDeviceAttributeCooperativeLaunch` |
||||
- `hipDeviceGetAttribute` |
||||
- `HIP_KERNEL_NAME` |
||||
- `hipGetLastError` |
||||
- `hipFree` |
@ -0,0 +1,99 @@
@@ -0,0 +1,99 @@
|
||||
<?xml version="1.0" encoding="utf-8"?> |
||||
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> |
||||
<ItemGroup Label="ProjectConfigurations"> |
||||
<ProjectConfiguration Include="Debug|x64"> |
||||
<Configuration>Debug</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
<ProjectConfiguration Include="Release|x64"> |
||||
<Configuration>Release</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClCompile Include="main.hip" /> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClInclude Include="..\..\Common\example_utils.hpp" /> |
||||
</ItemGroup> |
||||
<PropertyGroup Label="Globals"> |
||||
<VCProjectVersion>15.0</VCProjectVersion> |
||||
<ProjectGuid>{7a25ce69-bace-4410-beb0-12a69890f212}</ProjectGuid> |
||||
<Keyword>Win32Proj</Keyword> |
||||
<RootNamespace>cooperative_groups_vs2019</RootNamespace> |
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>true</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>false</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<WholeProgramOptimization>true</WholeProgramOptimization> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> |
||||
<ImportGroup Label="ExtensionSettings"> |
||||
<Import Condition="'$(HIPPropertiesImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.props" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="Shared"> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<PropertyGroup Label="UserMacros" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<LinkIncremental>true</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<LinkIncremental>false</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level2</WarningLevel> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
</Link> |
||||
</ItemDefinitionGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level2</WarningLevel> |
||||
<FunctionLevelLinking>true</FunctionLevelLinking> |
||||
<IntrinsicFunctions>true</IntrinsicFunctions> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding> |
||||
<OptimizeReferences>true</OptimizeReferences> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
</Link> |
||||
</ItemDefinitionGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> |
||||
<ImportGroup Label="ExtensionTargets"> |
||||
<Import Condition="'$(HIPTargetsImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.targets" /> |
||||
</ImportGroup> |
||||
</Project> |
@ -0,0 +1,249 @@
@@ -0,0 +1,249 @@
|
||||
// MIT License |
||||
// |
||||
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
// |
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
// of this software and associated documentation files (the "Software"), to deal |
||||
// in the Software without restriction, including without limitation the rights |
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
// copies of the Software, and to permit persons to whom the Software is |
||||
// furnished to do so, subject to the following conditions: |
||||
// |
||||
// The above copyright notice and this permission notice shall be included in all |
||||
// copies or substantial portions of the Software. |
||||
// |
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||
// SOFTWARE. |
||||
|
||||
#include "example_utils.hpp" |
||||
|
||||
#include <hip/hip_cooperative_groups.h> |
||||
#include <hip/hip_runtime.h> |
||||
|
||||
#include <iostream> |
||||
#include <vector> |
||||
|
||||
#include <cstddef> |
||||
#include <cstdlib> |
||||
|
||||
using namespace cooperative_groups; |
||||
|
||||
/// \brief Summation of `unsigned int val`'s in `thread_group g` using shared memory `x` |
||||
__device__ unsigned int reduce_sum(thread_group g, unsigned int* x, unsigned int val) |
||||
{ |
||||
// Rank of this thread in the group |
||||
const unsigned int group_thread_id = g.thread_rank(); |
||||
|
||||
// We start with half the group size as active threads |
||||
// Every iteration the number of active threads halves, until we processed all values |
||||
for(unsigned int i = g.size() / 2; i > 0; i /= 2) |
||||
{ |
||||
// Store value for this thread in a shared, temporary array |
||||
x[group_thread_id] = val; |
||||
|
||||
// Synchronize all threads in the group |
||||
g.sync(); |
||||
|
||||
// If our thread is still active, sum with its counterpart in the other half |
||||
if(group_thread_id < i) |
||||
{ |
||||
val += x[group_thread_id + i]; |
||||
} |
||||
|
||||
// Synchronize all threads in the group |
||||
g.sync(); |
||||
} |
||||
|
||||
// Only the first thread returns a valid value |
||||
if(g.thread_rank() == 0) |
||||
return val; |
||||
else |
||||
return 0; |
||||
} |
||||
|
||||
/// \brief A vector reduction kernel showcasing the use of cooperative groups. |
||||
/// - First we showcase the use of threadBlockGroup. |
||||
/// - Second we showcase the use of `tiled_partition<>()`. |
||||
/// \param partition_size The number of elements in a cooperative group's tiled_partition. |
||||
template<unsigned int PartitionSize> |
||||
__global__ void vector_reduce_kernel(const unsigned int* d_vector, |
||||
unsigned int* d_block_reduced_vector, |
||||
unsigned int* d_partition_reduced_vector) |
||||
{ |
||||
// threadBlockGroup consists of all threads in the block |
||||
thread_block thread_block_group = this_thread_block(); |
||||
|
||||
// Workspace array in shared memory required for reduction |
||||
__shared__ unsigned int workspace[2048]; |
||||
|
||||
unsigned int output; |
||||
|
||||
// Input to reduce |
||||
const unsigned int input = d_vector[thread_block_group.thread_rank()]; |
||||
|
||||
// Perform reduction |
||||
output = reduce_sum(thread_block_group, workspace, input); |
||||
|
||||
// Only the first thread returns a valid value |
||||
if(thread_block_group.thread_rank() == 0) |
||||
{ |
||||
d_block_reduced_vector[0] = output; |
||||
} |
||||
|
||||
// Every custom_partition group consists of 16 threads |
||||
thread_block_tile<PartitionSize> custom_partition |
||||
= tiled_partition<PartitionSize>(thread_block_group); |
||||
|
||||
// To make sure every partition has its own piece of shared memory it can work with |
||||
const unsigned int group_offset |
||||
= thread_block_group.thread_rank() - custom_partition.thread_rank(); |
||||
|
||||
// Perform reduction |
||||
output = reduce_sum(custom_partition, &workspace[group_offset], input); |
||||
|
||||
// Only the first thread in each partition returns a valid value |
||||
if(custom_partition.thread_rank() == 0) |
||||
{ |
||||
const unsigned int partition_id = thread_block_group.thread_rank() / PartitionSize; |
||||
d_partition_reduced_vector[partition_id] = output; |
||||
} |
||||
return; |
||||
} |
||||
|
||||
// Host side function to perform the same reductions as executed on the GPU |
||||
std::vector<unsigned int> ref_reduced(const unsigned int partition_size, |
||||
std::vector<unsigned int> input) |
||||
{ |
||||
const unsigned int input_size = input.size(); |
||||
const unsigned int result_size = input_size / partition_size; |
||||
std::vector<unsigned int> result(result_size); |
||||
|
||||
for(unsigned int i = 0; i < result_size; i++) |
||||
{ |
||||
unsigned int partition_result = 0; |
||||
for(unsigned int j = 0; j < partition_size; j++) |
||||
{ |
||||
partition_result += input[partition_size * i + j]; |
||||
} |
||||
result[i] = partition_result; |
||||
} |
||||
|
||||
return result; |
||||
} |
||||
|
||||
int main() |
||||
{ |
||||
#ifdef __HIP_PLATFORM_AMD__ |
||||
int device = 0; |
||||
int supports_coop_launch = 0; |
||||
// Check support |
||||
// Use hipDeviceAttributeCooperativeMultiDeviceLaunch when launching across multiple devices |
||||
HIP_CHECK(hipGetDevice(&device)); |
||||
HIP_CHECK( |
||||
hipDeviceGetAttribute(&supports_coop_launch, hipDeviceAttributeCooperativeLaunch, device)); |
||||
if(!supports_coop_launch) |
||||
{ |
||||
std::cout << "Skipping, device " << device << " does not support cooperative groups" |
||||
<< std::endl; |
||||
return 0; |
||||
} |
||||
#endif |
||||
|
||||
// Number of blocks to launch. |
||||
constexpr unsigned int num_blocks = 1; |
||||
|
||||
// Number of threads in each kernel block. |
||||
constexpr unsigned int threads_per_block = 64; |
||||
|
||||
// Total element count of the input vector. |
||||
constexpr unsigned int size = num_blocks * threads_per_block; |
||||
|
||||
// Total elements count of a tiled_partition. |
||||
constexpr unsigned int partition_size = 16; |
||||
|
||||
// Total size (in bytes) of the input vector. |
||||
constexpr size_t size_bytes = sizeof(unsigned int) * size; |
||||
|
||||
static_assert(threads_per_block % partition_size == 0, |
||||
"threads_per_block must be a multiple of partition_size"); |
||||
|
||||
// Allocate host vectors. |
||||
std::vector<unsigned int> h_vector(size); |
||||
std::vector<unsigned int> h_block_reduced(num_blocks); |
||||
std::vector<unsigned int> h_partition_reduced(threads_per_block / partition_size); |
||||
|
||||
// Set up input data. |
||||
for(unsigned int i = 0; i < size; i++) |
||||
{ |
||||
h_vector[i] = i; |
||||
} |
||||
|
||||
// Allocate device memory for the input and output matrices. |
||||
unsigned int* d_vector{}; |
||||
unsigned int* d_block_reduced{}; |
||||
unsigned int* d_partition_reduced{}; |
||||
HIP_CHECK(hipMalloc(&d_vector, size_bytes)); |
||||
HIP_CHECK(hipMalloc(&d_block_reduced, sizeof(unsigned int) * h_block_reduced.size())); |
||||
HIP_CHECK(hipMalloc(&d_partition_reduced, sizeof(unsigned int) * h_partition_reduced.size())); |
||||
|
||||
// Transfer the input vector to the device memory. |
||||
HIP_CHECK(hipMemcpy(d_vector, h_vector.data(), size_bytes, hipMemcpyHostToDevice)); |
||||
|
||||
void* params[] = {&d_vector, &d_block_reduced, &d_partition_reduced}; |
||||
// Launching kernel from host. |
||||
HIP_CHECK(hipLaunchCooperativeKernel(vector_reduce_kernel<partition_size>, |
||||
dim3(num_blocks), |
||||
dim3(threads_per_block), |
||||
params, |
||||
0, |
||||
hipStreamDefault)); |
||||
|
||||
// Check if the kernel launch was successful. |
||||
HIP_CHECK(hipGetLastError()); |
||||
|
||||
// Transfer the result back to the host. |
||||
HIP_CHECK(hipMemcpy(h_block_reduced.data(), |
||||
d_block_reduced, |
||||
sizeof(unsigned int) * h_block_reduced.size(), |
||||
hipMemcpyDeviceToHost)); |
||||
|
||||
HIP_CHECK(hipMemcpy(h_partition_reduced.data(), |
||||
d_partition_reduced, |
||||
sizeof(unsigned int) * h_partition_reduced.size(), |
||||
hipMemcpyDeviceToHost)); |
||||
|
||||
// Free the resources on the device. |
||||
HIP_CHECK(hipFree(d_vector)); |
||||
HIP_CHECK(hipFree(d_block_reduced)); |
||||
HIP_CHECK(hipFree(d_partition_reduced)); |
||||
|
||||
// Perform the reference (CPU) calculation. |
||||
std::vector<unsigned int> ref_block_reduced = ref_reduced(threads_per_block, h_vector); |
||||
std::vector<unsigned int> ref_partition_reduced = ref_reduced(partition_size, h_vector); |
||||
|
||||
// Check the results' validity. |
||||
unsigned int errors{}; |
||||
for(unsigned int i = 0; i < h_block_reduced.size(); i++) |
||||
{ |
||||
errors += (h_block_reduced[i] != ref_block_reduced[i]); |
||||
} |
||||
for(unsigned int i = 0; i < h_partition_reduced.size(); i++) |
||||
{ |
||||
errors += (h_partition_reduced[i] != ref_partition_reduced[i]); |
||||
} |
||||
|
||||
if(errors) |
||||
{ |
||||
std::cout << "Validation failed. Errors: " << errors << std::endl; |
||||
return error_exit_code; |
||||
} |
||||
else |
||||
{ |
||||
std::cout << "Validation passed." << std::endl; |
||||
} |
||||
} |
@ -0,0 +1 @@
@@ -0,0 +1 @@
|
||||
hip_device_globals |
@ -0,0 +1,57 @@
@@ -0,0 +1,57 @@
|
||||
# MIT License |
||||
# |
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
# |
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
# of this software and associated documentation files (the "Software"), to deal |
||||
# in the Software without restriction, including without limitation the rights |
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
# copies of the Software, and to permit persons to whom the Software is |
||||
# furnished to do so, subject to the following conditions: |
||||
# |
||||
# The above copyright notice and this permission notice shall be included in all |
||||
# copies or substantial portions of the Software. |
||||
# |
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||
# SOFTWARE. |
||||
|
||||
set(example_name device_globals) |
||||
|
||||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR) |
||||
project(${example_name} LANGUAGES CXX) |
||||
|
||||
set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA") |
||||
set(GPU_RUNTIMES "HIP" "CUDA") |
||||
set_property(CACHE GPU_RUNTIME PROPERTY STRINGS ${GPU_RUNTIMES}) |
||||
|
||||
if(NOT "${GPU_RUNTIME}" IN_LIST GPU_RUNTIMES) |
||||
set(ERROR_MESSAGE "GPU_RUNTIME is set to \"${GPU_RUNTIME}\".\nGPU_RUNTIME must be either HIP or CUDA.") |
||||
message(FATAL_ERROR ${ERROR_MESSAGE}) |
||||
endif() |
||||
|
||||
enable_language(${GPU_RUNTIME}) |
||||
set(CMAKE_${GPU_RUNTIME}_STANDARD 17) |
||||
set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF) |
||||
set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON) |
||||
|
||||
set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation") |
||||
if(NOT CMAKE_PREFIX_PATH) |
||||
set(CMAKE_PREFIX_PATH "${ROCM_ROOT}") |
||||
endif() |
||||
|
||||
add_executable(${example_name} main.hip) |
||||
# Make example runnable using ctest |
||||
add_test(${example_name} ${example_name}) |
||||
|
||||
set(include_dirs "../../Common") |
||||
if(GPU_RUNTIME STREQUAL "CUDA") |
||||
list(APPEND include_dirs "${ROCM_ROOT}/include") |
||||
endif() |
||||
|
||||
target_include_directories(${example_name} PRIVATE ${include_dirs}) |
||||
set_source_files_properties(main.hip PROPERTIES LANGUAGE ${GPU_RUNTIME}) |
@ -0,0 +1,60 @@
@@ -0,0 +1,60 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
EXAMPLE := hip_device_globals |
||||
COMMON_INCLUDE_DIR := ../../Common |
||||
GPU_RUNTIME := HIP |
||||
|
||||
# HIP variables
|
||||
ROCM_INSTALL_DIR := /opt/rocm |
||||
HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include |
||||
|
||||
HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc |
||||
|
||||
# Common variables and flags
|
||||
CXX_STD := c++17 |
||||
ICXXFLAGS := -std=$(CXX_STD) |
||||
ICPPFLAGS := -I $(COMMON_INCLUDE_DIR) |
||||
ILDFLAGS := |
||||
ILDLIBS := |
||||
|
||||
ifeq ($(GPU_RUNTIME), CUDA) |
||||
ICXXFLAGS += -x cu |
||||
ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR) |
||||
else ifeq ($(GPU_RUNTIME), HIP) |
||||
CXXFLAGS ?= -Wall -Wextra |
||||
else |
||||
$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP) |
||||
endif |
||||
|
||||
ICXXFLAGS += $(CXXFLAGS) |
||||
ICPPFLAGS += $(CPPFLAGS) |
||||
ILDFLAGS += $(LDFLAGS) |
||||
ILDLIBS += $(LDLIBS) |
||||
|
||||
$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp |
||||
$(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS) |
||||
|
||||
clean: |
||||
$(RM) $(EXAMPLE) |
||||
|
||||
.PHONY: clean |
@ -0,0 +1,45 @@
@@ -0,0 +1,45 @@
|
||||
# HIP-Basic Device Globals Example |
||||
|
||||
## Description |
||||
This program showcases a simple example that uses device global variables to perform a simple test kernel. Two such global variables are set using different methods: one is a single variable is set by first obtaining a pointer to it and using `hipMemcpy`, as would be done for a pointer to device memory using `hipMalloc`. The other is an array that is initialized without first explicitly obtaining the pointer by using `hipMemcpyToSymbol`. |
||||
|
||||
### Application flow |
||||
1. A number of constants are defined for the kernel launch parameters. |
||||
2. The input and output vectors are initialized in host memory. |
||||
3. The necessary amount of device memory for the input and output vectors is allocated and the input data is copied to the device. |
||||
4. A pointer to the device global variable `global` is obtained via `hipGetSymbolAddress`. |
||||
5. The pointee is initialized by copying a value from the host to it. |
||||
6. The device global variable `global_array` is initialized by copying to it directly with `hipMemcpyToSymbol`. |
||||
7. The GPU kernel is then launched with the previously defined arguments. |
||||
8. The results are copied back to the host. |
||||
9. Device memory backing the input and output vectors is freed. |
||||
10. A reference computation is performed on the host and the results are compared with the expected result. The result of the comparison is printed to standard output. |
||||
## Key APIs and Concepts |
||||
Apart from via kernel parameters, values can also be passed to the device via _device global variables_: global variables that have the `__device__` attribute. These can be used from device kernels, and need to be initialized from the host before they hold a valid value. Device global variables are persistent between kernel launches, so they can also be used to communicate values between lauches without explicitly managing a buffer for the on the host. |
||||
|
||||
A device global variable cannot be used as a regular global variable from the host side. To manage them, a pointer to the device memory that they represent needs to be obtained first. This can be done using the functions `hipGetSymbolAddress(dev_ptr, symbol)` and `hipGetSymbolSize(dev_ptr, symbol)`. A device global variable can be passed directly to this function by using the `HIP_SYMBOL(symbol)` macro. The resulting device pointer can be used in the same ways as memory obtained from `hipMalloc`, and so the corresponding value can be set by using `hipMemcpy`. |
||||
|
||||
Device global variables may also be initialized directly by using the `hipMemcpyToSymbol(symbol, host_source, size_bytes, offset = 0, kind = hipMemcpyHostToDevice)`. This method omits having to fetch the pointer to the device global variable explicitly. Similarly, `hipMemcpyFromSymbol(host_dest, symbol, size_bytes, offset = 0, kind = hipMemcpyDeviceToHost)` can be used to copy from a device global variable back to the host. |
||||
|
||||
## Demonstrated API Calls |
||||
### HIP runtime |
||||
#### Device symbols |
||||
- `__global__` |
||||
- `__device__` |
||||
- `threadIdx` |
||||
- `blockDim` |
||||
- `blockIdx` |
||||
|
||||
#### Host symbols |
||||
- `hipFree` |
||||
- `hipGetLastError` |
||||
- `hipGetSymbolAddress` |
||||
- `hipGetSymbolSize` |
||||
- `hipLaunchKernelGGL` |
||||
- `hipMalloc` |
||||
- `hipMemcpy` |
||||
- `hipMemcpyDeviceToHost` |
||||
- `hipMemcpyHostToDevice` |
||||
- `hipMemcpyToSymbol` |
||||
- `hipStreamDefault` |
||||
- `HIP_SYMBOL` |
@ -0,0 +1,99 @@
@@ -0,0 +1,99 @@
|
||||
<?xml version="1.0" encoding="utf-8"?> |
||||
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> |
||||
<ItemGroup Label="ProjectConfigurations"> |
||||
<ProjectConfiguration Include="Debug|x64"> |
||||
<Configuration>Debug</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
<ProjectConfiguration Include="Release|x64"> |
||||
<Configuration>Release</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClCompile Include="main.hip" /> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClInclude Include="..\..\Common\example_utils.hpp" /> |
||||
</ItemGroup> |
||||
<PropertyGroup Label="Globals"> |
||||
<VCProjectVersion>15.0</VCProjectVersion> |
||||
<ProjectGuid>{f7dd9451-b0ca-4c76-ab92-0e01cbebdbbe}</ProjectGuid> |
||||
<Keyword>Win32Proj</Keyword> |
||||
<RootNamespace>device_globals_vs2019</RootNamespace> |
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>true</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>false</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<WholeProgramOptimization>true</WholeProgramOptimization> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> |
||||
<ImportGroup Label="ExtensionSettings"> |
||||
<Import Condition="'$(HIPPropertiesImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.props" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="Shared"> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<PropertyGroup Label="UserMacros" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<LinkIncremental>true</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<LinkIncremental>false</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level2</WarningLevel> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
</Link> |
||||
</ItemDefinitionGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level2</WarningLevel> |
||||
<FunctionLevelLinking>true</FunctionLevelLinking> |
||||
<IntrinsicFunctions>true</IntrinsicFunctions> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding> |
||||
<OptimizeReferences>true</OptimizeReferences> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
</Link> |
||||
</ItemDefinitionGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> |
||||
<ImportGroup Label="ExtensionTargets"> |
||||
<Import Condition="'$(HIPTargetsImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.targets" /> |
||||
</ImportGroup> |
||||
</Project> |
@ -0,0 +1,164 @@
@@ -0,0 +1,164 @@
|
||||
// MIT License |
||||
// |
||||
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
// |
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
// of this software and associated documentation files (the "Software"), to deal |
||||
// in the Software without restriction, including without limitation the rights |
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
// copies of the Software, and to permit persons to whom the Software is |
||||
// furnished to do so, subject to the following conditions: |
||||
// |
||||
// The above copyright notice and this permission notice shall be included in all |
||||
// copies or substantial portions of the Software. |
||||
// |
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||
// SOFTWARE. |
||||
|
||||
#include "example_utils.hpp" |
||||
|
||||
#include <hip/hip_runtime.h> |
||||
|
||||
#include <cassert> |
||||
#include <iostream> |
||||
#include <numeric> |
||||
#include <vector> |
||||
|
||||
constexpr unsigned int device_array_size = 16; |
||||
|
||||
/// A test global variable of a single element, that will later be set from the host. |
||||
__device__ float global; |
||||
|
||||
/// A test global variable of \p device_array_size elements that will be set from the host. |
||||
__device__ float global_array[device_array_size]; |
||||
|
||||
/// \brief A simple test kernel, that reads from <tt>in</tt>, <tt>global</tt>, and |
||||
/// <tt>global_array</tt>. The result will be written to <tt>out</tt>. |
||||
__global__ void test_globals_kernel(float* out, const float* in, const size_t size) |
||||
{ |
||||
const unsigned int tid = blockDim.x * blockIdx.x + threadIdx.x; |
||||
if(tid < size) |
||||
{ |
||||
out[tid] = in[tid] + global + global_array[tid % device_array_size]; |
||||
} |
||||
} |
||||
|
||||
/// \brief Computes a reference result on the host, that is (if everything goes well) |
||||
/// hopefully equal to the results from the \p test_globals_kernel kernel. |
||||
std::vector<float> test_globals_reference(const std::vector<float>& in, |
||||
const std::vector<float> global_array, |
||||
const float global) |
||||
{ |
||||
std::vector<float> out(in.size()); |
||||
for(size_t i = 0; i < in.size(); ++i) |
||||
{ |
||||
out[i] = in[i] + global + global_array[i % global_array.size()]; |
||||
} |
||||
|
||||
return out; |
||||
} |
||||
|
||||
int main() |
||||
{ |
||||
// The size of the input and output vectors. |
||||
constexpr unsigned int size = 64; |
||||
|
||||
// The total number of bytes in the input and output vectors. |
||||
constexpr size_t size_bytes = size * sizeof(float); |
||||
|
||||
// Number of threads per kernel block. |
||||
constexpr unsigned int block_size = size; |
||||
|
||||
// Number of blocks per kernel grid. The expression below calculates ceil(size/block_size). |
||||
constexpr unsigned int grid_size = (size + block_size - 1) / block_size; |
||||
|
||||
// Allocate host vectors for the input and output. |
||||
std::vector<float> h_in(size); |
||||
std::vector<float> h_out(size); |
||||
|
||||
// Fill the input with an increasing sequence (i.e. 1, 2, 3, 4...). |
||||
std::iota(h_in.begin(), h_in.end(), 1.f); |
||||
|
||||
// Allocate and copy vectors to device memory. |
||||
float* d_in{}; |
||||
float* d_out{}; |
||||
HIP_CHECK(hipMalloc(&d_in, size_bytes)); |
||||
HIP_CHECK(hipMalloc(&d_out, size_bytes)); |
||||
HIP_CHECK(hipMemcpy(d_in, h_in.data(), size_bytes, hipMemcpyHostToDevice)); |
||||
|
||||
// Fetch a device pointer to the device variable "global". We can pass the relevant |
||||
// symbol directly to this function. |
||||
void* d_global{}; |
||||
size_t global_size_bytes{}; |
||||
HIP_CHECK(hipGetSymbolAddress(&d_global, HIP_SYMBOL(global))); |
||||
HIP_CHECK(hipGetSymbolSize(&global_size_bytes, HIP_SYMBOL(global))); |
||||
assert(global_size_bytes == sizeof(float)); |
||||
|
||||
// This pointer is a regular device pointer, and so we may use it in the same ways |
||||
// as pointers allocated using `hipMalloc`. |
||||
constexpr float h_global = 42.f; |
||||
HIP_CHECK(hipMemcpy(d_global, &h_global, global_size_bytes, hipMemcpyHostToDevice)); |
||||
|
||||
// Set up the inputs for `global_array`. |
||||
std::vector<float> h_global_array(device_array_size); |
||||
for(size_t i = 0; i < h_global_array.size(); ++i) |
||||
{ |
||||
h_global_array[i] = i * 1000.f; |
||||
} |
||||
|
||||
// Initialize `global_array` by copying to it directly, omitting the need to fetch it first. |
||||
HIP_CHECK(hipMemcpyToSymbol(HIP_SYMBOL(global_array), |
||||
h_global_array.data(), |
||||
h_global_array.size() * sizeof(float))); |
||||
|
||||
// Launch the kernel on the default stream and with the above configuration. |
||||
hipLaunchKernelGGL(test_globals_kernel, |
||||
dim3(block_size), |
||||
dim3(grid_size), |
||||
0, |
||||
hipStreamDefault, |
||||
d_out, |
||||
d_in, |
||||
size); |
||||
|
||||
// Check if the kernel launch was successful. |
||||
HIP_CHECK(hipGetLastError()); |
||||
|
||||
// Copy the results back to the host. This call blocks the host's execution until the copy is finished. |
||||
HIP_CHECK(hipMemcpy(h_out.data(), d_out, size_bytes, hipMemcpyDeviceToHost)); |
||||
|
||||
// Free device memory. |
||||
HIP_CHECK(hipFree(d_in)); |
||||
HIP_CHECK(hipFree(d_out)); |
||||
|
||||
// Compute the expected values on the host. |
||||
const std::vector<float> reference = test_globals_reference(h_in, h_global_array, h_global); |
||||
|
||||
// Check the results' validity. |
||||
constexpr float eps = 1.0E-6; |
||||
unsigned int errors{}; |
||||
for(size_t i = 0; i < size; ++i) |
||||
{ |
||||
if(std::fabs(h_out[i] - reference[i]) > eps) |
||||
{ |
||||
++errors; |
||||
} |
||||
} |
||||
|
||||
if(errors != 0) |
||||
{ |
||||
std::cout << "Validation failed. Errors: " << errors << std::endl; |
||||
return error_exit_code; |
||||
} |
||||
else |
||||
{ |
||||
std::cout << "Validation passed." << std::endl; |
||||
} |
||||
|
||||
return 0; |
||||
} |
@ -0,0 +1,57 @@
@@ -0,0 +1,57 @@
|
||||
# MIT License |
||||
# |
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
# |
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
# of this software and associated documentation files (the "Software"), to deal |
||||
# in the Software without restriction, including without limitation the rights |
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
# copies of the Software, and to permit persons to whom the Software is |
||||
# furnished to do so, subject to the following conditions: |
||||
# |
||||
# The above copyright notice and this permission notice shall be included in all |
||||
# copies or substantial portions of the Software. |
||||
# |
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||
# SOFTWARE. |
||||
|
||||
set(example_name hip_gpu_arch) |
||||
|
||||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR) |
||||
project(${example_name} LANGUAGES CXX) |
||||
|
||||
set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA") |
||||
set(GPU_RUNTIMES "HIP" "CUDA") |
||||
set_property(CACHE GPU_RUNTIME PROPERTY STRINGS ${GPU_RUNTIMES}) |
||||
|
||||
if(NOT "${GPU_RUNTIME}" IN_LIST GPU_RUNTIMES) |
||||
set(ERROR_MESSAGE "GPU_RUNTIME is set to \"${GPU_RUNTIME}\".\nGPU_RUNTIME must be either HIP or CUDA.") |
||||
message(FATAL_ERROR ${ERROR_MESSAGE}) |
||||
endif() |
||||
|
||||
enable_language(${GPU_RUNTIME}) |
||||
set(CMAKE_${GPU_RUNTIME}_STANDARD 17) |
||||
set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF) |
||||
set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON) |
||||
|
||||
set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation") |
||||
if(NOT CMAKE_PREFIX_PATH) |
||||
set(CMAKE_PREFIX_PATH "${ROCM_ROOT}") |
||||
endif() |
||||
|
||||
add_executable(${example_name} main.hip) |
||||
# Make example runnable using ctest |
||||
add_test(${example_name} ${example_name}) |
||||
|
||||
set(include_dirs "../../Common") |
||||
if(GPU_RUNTIME STREQUAL "CUDA") |
||||
list(APPEND include_dirs "${ROCM_ROOT}/include") |
||||
endif() |
||||
|
||||
target_include_directories(${example_name} PRIVATE ${include_dirs}) |
||||
set_source_files_properties(main.hip PROPERTIES LANGUAGE ${GPU_RUNTIME}) |
@ -0,0 +1,60 @@
@@ -0,0 +1,60 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
EXAMPLE := hip_gpu_arch |
||||
COMMON_INCLUDE_DIR := ../../Common |
||||
GPU_RUNTIME := HIP |
||||
|
||||
# HIP variables
|
||||
ROCM_INSTALL_DIR := /opt/rocm |
||||
HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include |
||||
|
||||
HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc |
||||
|
||||
# Common variables and flags
|
||||
CXX_STD := c++17 |
||||
ICXXFLAGS := -std=$(CXX_STD) |
||||
ICPPFLAGS := -I $(COMMON_INCLUDE_DIR) |
||||
ILDFLAGS := |
||||
ILDLIBS := |
||||
|
||||
ifeq ($(GPU_RUNTIME), CUDA) |
||||
ICXXFLAGS += -x cu |
||||
ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR) |
||||
else ifeq ($(GPU_RUNTIME), HIP) |
||||
CXXFLAGS ?= -Wall -Wextra |
||||
else |
||||
$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP) |
||||
endif |
||||
|
||||
ICXXFLAGS += $(CXXFLAGS) |
||||
ICPPFLAGS += $(CPPFLAGS) |
||||
ILDFLAGS += $(LDFLAGS) |
||||
ILDLIBS += $(LDLIBS) |
||||
|
||||
$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp |
||||
$(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS) |
||||
|
||||
clean: |
||||
$(RM) $(EXAMPLE) |
||||
|
||||
.PHONY: clean |
@ -0,0 +1,33 @@
@@ -0,0 +1,33 @@
|
||||
# HIP-Basic GPU Architecture-specific Code Example |
||||
|
||||
## Description |
||||
This program showcases an implementation of a simple matrix transpose kernel, which uses a different codepath depending on the target architecture. |
||||
|
||||
### Application flow |
||||
1. A number of constants are defined to control the problem details and the kernel launch parameters. |
||||
2. Input matrix is set up in host memory. |
||||
3. The necessary amount of device memory is allocated and input is copied to the device. |
||||
4. The GPU transposition kernel is launched with previously defined arguments. |
||||
5. The kernel will have two different codepaths for its data movement, depending on the target architecture. |
||||
6. The transposed matrix is copied back to the host and all device memory is freed. |
||||
7. The elements of the result matrix are compared with the expected result. The result of the comparison is printed to the standard output. |
||||
|
||||
## Key APIs and Concepts |
||||
This example showcases two different codepaths inside a GPU kernel, depending on the target architecture. |
||||
|
||||
You may want to use architecture-specific inline assembly when compiling for a specific architecture, without losing compatibility with other architectures (see the [inline_assembly](/HIP-Basic/inline_assembly/main.hip) example). |
||||
|
||||
These architecture-specific compiler definitions only exist within GPU kernels. If you would like to have GPU architecture-specific host-side code, you could query the stream/device information at runtime. |
||||
|
||||
## Demonstrated API Calls |
||||
### HIP runtime |
||||
#### Device symbols |
||||
- `threadIdx`, `blockIdx`, `blockDim` |
||||
- `__gfx1010__`, `__gfx1011__`, `__gfx1012__`, `__gfx1030__`, `__gfx1031__`, `__gfx1100__`, `__gfx1101__`, `__gfx1102__` |
||||
#### Host symbols |
||||
- `hipMalloc` |
||||
- `hipMemcpy` |
||||
- `hipLaunchKernelGGL` |
||||
- `HIP_KERNEL_NAME` |
||||
- `hipGetLastError` |
||||
- `hipFree` |
@ -0,0 +1,99 @@
@@ -0,0 +1,99 @@
|
||||
<?xml version="1.0" encoding="utf-8"?> |
||||
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> |
||||
<ItemGroup Label="ProjectConfigurations"> |
||||
<ProjectConfiguration Include="Debug|x64"> |
||||
<Configuration>Debug</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
<ProjectConfiguration Include="Release|x64"> |
||||
<Configuration>Release</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClCompile Include="main.hip" /> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClInclude Include="..\..\Common\example_utils.hpp" /> |
||||
</ItemGroup> |
||||
<PropertyGroup Label="Globals"> |
||||
<VCProjectVersion>15.0</VCProjectVersion> |
||||
<ProjectGuid>{4e6b2034-d7ed-4cb4-98b2-7b2d2b71e0a8}</ProjectGuid> |
||||
<Keyword>Win32Proj</Keyword> |
||||
<RootNamespace>gpu_arch_vs2019</RootNamespace> |
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>true</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>false</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<WholeProgramOptimization>true</WholeProgramOptimization> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> |
||||
<ImportGroup Label="ExtensionSettings"> |
||||
<Import Condition="'$(HIPPropertiesImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.props" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="Shared"> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<PropertyGroup Label="UserMacros" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<LinkIncremental>true</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<LinkIncremental>false</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level2</WarningLevel> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
</Link> |
||||
</ItemDefinitionGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level2</WarningLevel> |
||||
<FunctionLevelLinking>true</FunctionLevelLinking> |
||||
<IntrinsicFunctions>true</IntrinsicFunctions> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding> |
||||
<OptimizeReferences>true</OptimizeReferences> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
</Link> |
||||
</ItemDefinitionGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> |
||||
<ImportGroup Label="ExtensionTargets"> |
||||
<Import Condition="'$(HIPTargetsImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.targets" /> |
||||
</ImportGroup> |
||||
</Project> |
@ -0,0 +1,150 @@
@@ -0,0 +1,150 @@
|
||||
// MIT License |
||||
// |
||||
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
// |
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
// of this software and associated documentation files (the "Software"), to deal |
||||
// in the Software without restriction, including without limitation the rights |
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
// copies of the Software, and to permit persons to whom the Software is |
||||
// furnished to do so, subject to the following conditions: |
||||
// |
||||
// The above copyright notice and this permission notice shall be included in all |
||||
// copies or substantial portions of the Software. |
||||
// |
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||
// SOFTWARE. |
||||
|
||||
#include "example_utils.hpp" |
||||
|
||||
#include <hip/hip_runtime.h> |
||||
|
||||
#include <iostream> |
||||
#include <vector> |
||||
|
||||
#include <cstddef> |
||||
#include <cstdlib> |
||||
|
||||
/// \brief A simple matrix transpose kernel that's using inline assembly. |
||||
/// - The number of rows in the input and output matrices is equal, and given by the \p width parameter. |
||||
/// - Each thread in the grid is responsible for one element of the input and output matrices. |
||||
__global__ void matrix_transpose_kernel(float* out, const float* in, const unsigned int width) |
||||
{ |
||||
int x = blockDim.x * blockIdx.x + threadIdx.x; |
||||
int y = blockDim.y * blockIdx.y + threadIdx.y; |
||||
|
||||
#if(__gfx1010__ || __gfx1011__ || __gfx1012__ || __gfx1030__ || __gfx1031__ || __gfx1100__ \ |
||||
|| __gfx1101__ || __gfx1102__) |
||||
// Codepath for one of the architectures listed above |
||||
out[y * width + x] = in[x * width + y]; |
||||
#else |
||||
// Codepath if we're not on one of those architectures |
||||
// Note: to check if we're on AMD or NVIDIA hardware, you could use the more generic: |
||||
// __HIP_PLATFORM_AMD__ and __HIP_PLATFORM_NVIDIA__ |
||||
out[x * width + y] = in[y * width + x]; |
||||
#endif |
||||
} |
||||
|
||||
// CPU implementation of matrix transpose |
||||
std::vector<float> matrix_transpose_reference(const std::vector<float>& input, |
||||
const unsigned int width) |
||||
{ |
||||
std::vector<float> output(width * width); |
||||
for(unsigned int j = 0; j < width; j++) |
||||
{ |
||||
for(unsigned int i = 0; i < width; i++) |
||||
{ |
||||
output[i * width + j] = input[j * width + i]; |
||||
} |
||||
} |
||||
return output; |
||||
} |
||||
|
||||
int main() |
||||
{ |
||||
// Number of rows and columns in the transposed square matrix. |
||||
constexpr unsigned int width = 1024; |
||||
|
||||
// Number of threads in each kernel block along the X dimension. |
||||
constexpr unsigned int threads_per_block_x = 8; |
||||
|
||||
// Number of threads in each kernel block along the Y dimension. |
||||
constexpr unsigned int threads_per_block_y = 8; |
||||
|
||||
// Total element count of the transposed matrix. |
||||
constexpr unsigned int size = width * width; |
||||
|
||||
// Total size (in bytes) of the transposed matrix. |
||||
constexpr size_t size_bytes = sizeof(float) * size; |
||||
|
||||
// Allocate host vectors. |
||||
std::vector<float> h_matrix(size); |
||||
std::vector<float> h_transposed_matrix(size); |
||||
|
||||
// Set up input data. |
||||
for(unsigned int i = 0; i < size; i++) |
||||
{ |
||||
h_matrix[i] = i * 10.0f; |
||||
} |
||||
|
||||
// Allocate device memory for the input and output matrices. |
||||
float* d_matrix{}; |
||||
float* d_transposed_matrix{}; |
||||
HIP_CHECK(hipMalloc(&d_matrix, size_bytes)); |
||||
HIP_CHECK(hipMalloc(&d_transposed_matrix, size_bytes)); |
||||
|
||||
// Transfer the input matrix to the device memory. |
||||
HIP_CHECK(hipMemcpy(d_matrix, h_matrix.data(), size_bytes, hipMemcpyHostToDevice)); |
||||
|
||||
// Lauching kernel from host. |
||||
hipLaunchKernelGGL(HIP_KERNEL_NAME(matrix_transpose_kernel), |
||||
dim3(width / threads_per_block_x, width / threads_per_block_y), |
||||
dim3(threads_per_block_x, threads_per_block_y), |
||||
0, |
||||
hipStreamDefault, |
||||
d_transposed_matrix, |
||||
d_matrix, |
||||
width); |
||||
|
||||
// Check if the kernel launch was successful. |
||||
HIP_CHECK(hipGetLastError()); |
||||
|
||||
// Transfer the result back to the host. |
||||
HIP_CHECK(hipMemcpy(h_transposed_matrix.data(), |
||||
d_transposed_matrix, |
||||
size_bytes, |
||||
hipMemcpyDeviceToHost)); |
||||
|
||||
// Free the resources on the device. |
||||
HIP_CHECK(hipFree(d_matrix)); |
||||
HIP_CHECK(hipFree(d_transposed_matrix)); |
||||
|
||||
// Perform the reference (CPU) calculation. |
||||
std::vector<float> ref_transposed_matrix = matrix_transpose_reference(h_matrix, width); |
||||
|
||||
// Check the results' validity. |
||||
constexpr float eps = 1.0E-6; |
||||
unsigned int errors{}; |
||||
for(unsigned int i = 0; i < size; i++) |
||||
{ |
||||
if(std::fabs(h_transposed_matrix[i] - ref_transposed_matrix[i]) > eps) |
||||
{ |
||||
errors++; |
||||
} |
||||
} |
||||
|
||||
if(errors != 0) |
||||
{ |
||||
std::cout << "Validation failed. Errors: " << errors << std::endl; |
||||
return error_exit_code; |
||||
} |
||||
else |
||||
{ |
||||
std::cout << "Validation passed." << std::endl; |
||||
} |
||||
} |
@ -0,0 +1 @@
@@ -0,0 +1 @@
|
||||
hip_inline_assembly |
@ -0,0 +1,57 @@
@@ -0,0 +1,57 @@
|
||||
# MIT License |
||||
# |
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
# |
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
# of this software and associated documentation files (the "Software"), to deal |
||||
# in the Software without restriction, including without limitation the rights |
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
# copies of the Software, and to permit persons to whom the Software is |
||||
# furnished to do so, subject to the following conditions: |
||||
# |
||||
# The above copyright notice and this permission notice shall be included in all |
||||
# copies or substantial portions of the Software. |
||||
# |
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||
# SOFTWARE. |
||||
|
||||
set(example_name hip_inline_assembly) |
||||
|
||||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR) |
||||
project(${example_name} LANGUAGES CXX) |
||||
|
||||
set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA") |
||||
set(GPU_RUNTIMES "HIP" "CUDA") |
||||
set_property(CACHE GPU_RUNTIME PROPERTY STRINGS ${GPU_RUNTIMES}) |
||||
|
||||
if(NOT "${GPU_RUNTIME}" IN_LIST GPU_RUNTIMES) |
||||
set(ERROR_MESSAGE "GPU_RUNTIME is set to \"${GPU_RUNTIME}\".\nGPU_RUNTIME must be either HIP or CUDA.") |
||||
message(FATAL_ERROR ${ERROR_MESSAGE}) |
||||
endif() |
||||
|
||||
enable_language(${GPU_RUNTIME}) |
||||
set(CMAKE_${GPU_RUNTIME}_STANDARD 17) |
||||
set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF) |
||||
set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON) |
||||
|
||||
set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation") |
||||
if(NOT CMAKE_PREFIX_PATH) |
||||
set(CMAKE_PREFIX_PATH "${ROCM_ROOT}") |
||||
endif() |
||||
|
||||
add_executable(${example_name} main.hip) |
||||
# Make example runnable using ctest |
||||
add_test(${example_name} ${example_name}) |
||||
|
||||
set(include_dirs "../../Common") |
||||
if(GPU_RUNTIME STREQUAL "CUDA") |
||||
list(APPEND include_dirs "${ROCM_ROOT}/include") |
||||
endif() |
||||
|
||||
target_include_directories(${example_name} PRIVATE ${include_dirs}) |
||||
set_source_files_properties(main.hip PROPERTIES LANGUAGE ${GPU_RUNTIME}) |
@ -0,0 +1,60 @@
@@ -0,0 +1,60 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
EXAMPLE := hip_inline_assembly |
||||
COMMON_INCLUDE_DIR := ../../Common |
||||
GPU_RUNTIME := HIP |
||||
|
||||
# HIP variables
|
||||
ROCM_INSTALL_DIR := /opt/rocm |
||||
HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include |
||||
|
||||
HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc |
||||
|
||||
# Common variables and flags
|
||||
CXX_STD := c++17 |
||||
ICXXFLAGS := -std=$(CXX_STD) |
||||
ICPPFLAGS := -I $(COMMON_INCLUDE_DIR) |
||||
ILDFLAGS := |
||||
ILDLIBS := |
||||
|
||||
ifeq ($(GPU_RUNTIME), CUDA) |
||||
ICXXFLAGS += -x cu |
||||
ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR) |
||||
else ifeq ($(GPU_RUNTIME), HIP) |
||||
CXXFLAGS ?= -Wall -Wextra |
||||
else |
||||
$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP) |
||||
endif |
||||
|
||||
ICXXFLAGS += $(CXXFLAGS) |
||||
ICPPFLAGS += $(CPPFLAGS) |
||||
ILDFLAGS += $(LDFLAGS) |
||||
ILDLIBS += $(LDLIBS) |
||||
|
||||
$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp |
||||
$(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS) |
||||
|
||||
clean: |
||||
$(RM) $(EXAMPLE) |
||||
|
||||
.PHONY: clean |
@ -0,0 +1,48 @@
@@ -0,0 +1,48 @@
|
||||
# HIP-Basic Inline Assembly Example |
||||
|
||||
## Description |
||||
This program showcases an implementation of a simple matrix transpose kernel, which uses inline assembly and works on both AMD and NVIDIA hardware. |
||||
|
||||
By using inline assembly in your kernels, you may be able to gain extra performance. |
||||
It could also enable you to use special GPU hardware features which are not available through compiler intrinsics. |
||||
|
||||
|
||||
For more insights, please read the following blogs by Ben Sander: |
||||
[The Art of AMDGCN Assembly: How to Bend the Machine to Your Will](https://gpuopen.com/learn/amdgcn-assembly/) & |
||||
[AMD GCN Assembly: Cross-Lane Operations](https://gpuopen.com/learn/amd-gcn-assembly-cross-lane-operations/) |
||||
|
||||
For more information: |
||||
[AMD ISA documentation for current architectures](https://gpuopen.com/amd-isa-documentation/) & |
||||
[User Guide for LLVM AMDGPU Back-end](https://llvm.org/docs/AMDGPUUsage.html) |
||||
|
||||
|
||||
### Application flow |
||||
1. A number of variables are defined to control the problem details and the kernel launch parameters. |
||||
2. Input matrix is set up in host memory. |
||||
3. The necessary amount of device memory is allocated and input is copied to the device. |
||||
4. The GPU transposition kernel is launched with previously defined arguments. |
||||
5. The kernel will use different inline assembly for its data movement, depending on the target platform. |
||||
6. The transposed matrix is copied back to the host and all device memory is freed. |
||||
7. The elements of the result matrix are compared with the expected result. The result of the comparison is printed to the standard output. |
||||
|
||||
## Key APIs and Concepts |
||||
Using inline assembly in GPU kernels is somewhat similar to using inline assembly in host-side code. The `volatile` statement tells the compiler to not remove the assembly statement during optimizations. |
||||
|
||||
```c++ |
||||
asm volatile("v_mov_b32_e32 %0, %1" : "=v"(variable_0) : "v"(variable_1)) |
||||
``` |
||||
|
||||
However, since the instruction set differs between GPU architectures, you usually want to use the appropriate GPU architecture compiler defines to support multiple architectures (see the [gpu_arch](/HIP-Basic/gpu_arch/main.hip) example for more fine-grained architecture control). |
||||
|
||||
## Demonstrated API Calls |
||||
### HIP runtime |
||||
#### Device symbols |
||||
- `threadIdx`, `blockIdx`, `blockDim` |
||||
- `__HIP_PLATFORM_AMD__`, `__HIP_PLATFORM_NVIDIA__` |
||||
#### Host symbols |
||||
- `hipMalloc` |
||||
- `hipMemcpy` |
||||
- `hipLaunchKernelGGL` |
||||
- `HIP_KERNEL_NAME` |
||||
- `hipGetLastError` |
||||
- `hipFree` |
@ -0,0 +1,99 @@
@@ -0,0 +1,99 @@
|
||||
<?xml version="1.0" encoding="utf-8"?> |
||||
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> |
||||
<ItemGroup Label="ProjectConfigurations"> |
||||
<ProjectConfiguration Include="Debug|x64"> |
||||
<Configuration>Debug</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
<ProjectConfiguration Include="Release|x64"> |
||||
<Configuration>Release</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClCompile Include="main.hip" /> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClInclude Include="..\..\Common\example_utils.hpp" /> |
||||
</ItemGroup> |
||||
<PropertyGroup Label="Globals"> |
||||
<VCProjectVersion>15.0</VCProjectVersion> |
||||
<ProjectGuid>{4e6b2034-d7ed-4cb4-98b2-7b2d2b71e0a7}</ProjectGuid> |
||||
<Keyword>Win32Proj</Keyword> |
||||
<RootNamespace>inline_assembly_vs2019</RootNamespace> |
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>true</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>false</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<WholeProgramOptimization>true</WholeProgramOptimization> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> |
||||
<ImportGroup Label="ExtensionSettings"> |
||||
<Import Condition="'$(HIPPropertiesImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.props" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="Shared"> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<PropertyGroup Label="UserMacros" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<LinkIncremental>true</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<LinkIncremental>false</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level2</WarningLevel> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
</Link> |
||||
</ItemDefinitionGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level2</WarningLevel> |
||||
<FunctionLevelLinking>true</FunctionLevelLinking> |
||||
<IntrinsicFunctions>true</IntrinsicFunctions> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding> |
||||
<OptimizeReferences>true</OptimizeReferences> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
</Link> |
||||
</ItemDefinitionGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> |
||||
<ImportGroup Label="ExtensionTargets"> |
||||
<Import Condition="'$(HIPTargetsImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.targets" /> |
||||
</ImportGroup> |
||||
</Project> |
@ -0,0 +1,145 @@
@@ -0,0 +1,145 @@
|
||||
// MIT License |
||||
// |
||||
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
// |
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
// of this software and associated documentation files (the "Software"), to deal |
||||
// in the Software without restriction, including without limitation the rights |
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
// copies of the Software, and to permit persons to whom the Software is |
||||
// furnished to do so, subject to the following conditions: |
||||
// |
||||
// The above copyright notice and this permission notice shall be included in all |
||||
// copies or substantial portions of the Software. |
||||
// |
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||
// SOFTWARE. |
||||
|
||||
#include "example_utils.hpp" |
||||
|
||||
#include <hip/hip_runtime.h> |
||||
|
||||
#include <iostream> |
||||
#include <vector> |
||||
|
||||
#include <cstddef> |
||||
#include <cstdlib> |
||||
|
||||
/// \brief A simple matrix transpose kernel that's using inline assembly. |
||||
/// - The number of rows in the input and output matrices is equal, and given by the \p width parameter. |
||||
/// - Each thread in the grid is responsible for one element of the input and output matrices. |
||||
__global__ void matrix_transpose_kernel(float* out, const float* in, const unsigned int width) |
||||
{ |
||||
int x = blockDim.x * blockIdx.x + threadIdx.x; |
||||
int y = blockDim.y * blockIdx.y + threadIdx.y; |
||||
|
||||
#ifdef __HIP_PLATFORM_AMD__ |
||||
asm volatile("v_mov_b32_e32 %0, %1" : "=v"(out[x * width + y]) : "v"(in[y * width + x])); |
||||
#elif defined(__HIP_PLATFORM_NVIDIA__) |
||||
asm volatile("mov.f32 %0, %1;" : "=f"(out[x * width + y]) : "f"(in[y * width + x])); |
||||
#endif |
||||
} |
||||
|
||||
// CPU implementation of matrix transpose |
||||
std::vector<float> matrix_transpose_reference(const std::vector<float>& input, |
||||
const unsigned int width) |
||||
{ |
||||
std::vector<float> output(width * width); |
||||
for(unsigned int j = 0; j < width; j++) |
||||
{ |
||||
for(unsigned int i = 0; i < width; i++) |
||||
{ |
||||
output[i * width + j] = input[j * width + i]; |
||||
} |
||||
} |
||||
return output; |
||||
} |
||||
|
||||
int main() |
||||
{ |
||||
// Number of rows and columns in the transposed square matrix. |
||||
constexpr unsigned int width = 1024; |
||||
|
||||
// Number of threads in each kernel block along the X dimension. |
||||
constexpr unsigned int threads_per_block_x = 8; |
||||
|
||||
// Number of threads in each kernel block along the Y dimension. |
||||
constexpr unsigned int threads_per_block_y = 8; |
||||
|
||||
// Total element count of the transposed matrix. |
||||
constexpr unsigned int size = width * width; |
||||
|
||||
// Total size (in bytes) of the transposed matrix. |
||||
constexpr size_t size_bytes = sizeof(float) * size; |
||||
|
||||
// Allocate host vectors. |
||||
std::vector<float> h_matrix(size); |
||||
std::vector<float> h_transposed_matrix(size); |
||||
|
||||
// Set up input data. |
||||
for(unsigned int i = 0; i < size; i++) |
||||
{ |
||||
h_matrix[i] = i * 10.0f; |
||||
} |
||||
|
||||
// Allocate device memory for the input and output matrices. |
||||
float* d_matrix{}; |
||||
float* d_transposed_matrix{}; |
||||
HIP_CHECK(hipMalloc(&d_matrix, size_bytes)); |
||||
HIP_CHECK(hipMalloc(&d_transposed_matrix, size_bytes)); |
||||
|
||||
// Transfer the input matrix to the device memory. |
||||
HIP_CHECK(hipMemcpy(d_matrix, h_matrix.data(), size_bytes, hipMemcpyHostToDevice)); |
||||
|
||||
// Lauching kernel from host. |
||||
hipLaunchKernelGGL(HIP_KERNEL_NAME(matrix_transpose_kernel), |
||||
dim3(width / threads_per_block_x, width / threads_per_block_y), |
||||
dim3(threads_per_block_x, threads_per_block_y), |
||||
0, |
||||
hipStreamDefault, |
||||
d_transposed_matrix, |
||||
d_matrix, |
||||
width); |
||||
|
||||
// Check if the kernel launch was successful. |
||||
HIP_CHECK(hipGetLastError()); |
||||
|
||||
// Transfer the result back to the host. |
||||
HIP_CHECK(hipMemcpy(h_transposed_matrix.data(), |
||||
d_transposed_matrix, |
||||
size_bytes, |
||||
hipMemcpyDeviceToHost)); |
||||
|
||||
// Free the resources on the device. |
||||
HIP_CHECK(hipFree(d_matrix)); |
||||
HIP_CHECK(hipFree(d_transposed_matrix)); |
||||
|
||||
// Perform the reference (CPU) calculation. |
||||
std::vector<float> ref_transposed_matrix = matrix_transpose_reference(h_matrix, width); |
||||
|
||||
// Check the results' validity. |
||||
constexpr float eps = 1.0E-6; |
||||
unsigned int errors{}; |
||||
for(unsigned int i = 0; i < size; i++) |
||||
{ |
||||
if(std::fabs(h_transposed_matrix[i] - ref_transposed_matrix[i]) > eps) |
||||
{ |
||||
errors++; |
||||
} |
||||
} |
||||
|
||||
if(errors != 0) |
||||
{ |
||||
std::cout << "Validation failed. Errors: " << errors << std::endl; |
||||
return error_exit_code; |
||||
} |
||||
else |
||||
{ |
||||
std::cout << "Validation passed." << std::endl; |
||||
} |
||||
} |
@ -1,183 +1,183 @@
@@ -1,183 +1,183 @@
|
||||
<?xml version="1.0" encoding="utf-8"?> |
||||
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> |
||||
<ItemGroup Label="ProjectConfigurations"> |
||||
<ProjectConfiguration Include="Debug|x64"> |
||||
<Configuration>Debug</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
<ProjectConfiguration Include="Release|x64"> |
||||
<Configuration>Release</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClCompile Include="main.hip"> |
||||
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">--cuda-host-only</AdditionalOptions> |
||||
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">--cuda-host-only</AdditionalOptions> |
||||
</ClCompile> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClInclude Include="..\..\Common\example_utils.hpp" /> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<CustomBuild Include="hip_obj_gen_win.mcin"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">copy %(Identity) "$(IntDir)%(Identity)"</Command> |
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Copying %(Identity)</Message> |
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Identity)</Outputs> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy %(Identity) "$(IntDir)%(Identity)"</Command> |
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Copying %(Identity)</Message> |
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Identity)</Outputs> |
||||
</CustomBuild> |
||||
<CustomBuild Include="main_gfx1030.ll"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx1030</Command> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx1030</Command> |
||||
</CustomBuild> |
||||
<CustomBuild Include="main_gfx803.ll"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx803</Command> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx803</Command> |
||||
</CustomBuild> |
||||
<CustomBuild Include="main_gfx900.ll"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ClangToolPath)clang+"+ -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx900</Command> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ClangToolPath)clang+"+ -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx900</Command> |
||||
</CustomBuild> |
||||
<CustomBuild Include="main_gfx906.ll"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx906</Command> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx906</Command> |
||||
</CustomBuild> |
||||
<CustomBuild Include="main_gfx908.ll"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx908</Command> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx908</Command> |
||||
</CustomBuild> |
||||
<CustomBuild Include="main_gfx90a.ll"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx90a </Command> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx90a </Command> |
||||
</CustomBuild> |
||||
</ItemGroup> |
||||
<PropertyGroup Label="Globals"> |
||||
<VCProjectVersion>15.0</VCProjectVersion> |
||||
<ProjectGuid>{dbb8dfe9-cb1b-473c-937c-2a8120e0d819}</ProjectGuid> |
||||
<Keyword>Win32Proj</Keyword> |
||||
<RootNamespace>llvm_ir_to_executable_vs2019</RootNamespace> |
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>true</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>false</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<WholeProgramOptimization>true</WholeProgramOptimization> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> |
||||
<ImportGroup Label="ExtensionSettings"> |
||||
<Import Condition="'$(HIPPropertiesImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.props" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="Shared"> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<PropertyGroup Label="UserMacros" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<LinkIncremental>true</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
<CustomBuildBeforeTargets>ClCompile</CustomBuildBeforeTargets> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<LinkIncremental>false</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
<CustomBuildBeforeTargets>ClCompile</CustomBuildBeforeTargets> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level1</WarningLevel> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
<AdditionalDependencies>$(IntDir)main_device.obj;%(AdditionalDependencies)</AdditionalDependencies> |
||||
</Link> |
||||
<CustomBuild> |
||||
<Message>Compiling Device LLVM IR %(Identity)</Message> |
||||
<Command>"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa</Command> |
||||
<Outputs>$(IntDir)%(FileName).o</Outputs> |
||||
</CustomBuild> |
||||
<CustomBuildStep> |
||||
<Command>"$(ClangToolPath)clang-offload-bundler" -type=o -bundle-align=4096 -targets=host-x86_64-pc-windows-msvc,hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900,hipv4-amdgcn-amd-amdhsa--gfx906,hipv4-amdgcn-amd-amdhsa--gfx908,hipv4-amdgcn-amd-amdhsa--gfx90a,hipv4-amdgcn-amd-amdhsa--gfx1030 -input=nul "-input=$(IntDir)main_gfx803.o" "-input=$(IntDir)main_gfx900.o" "-input=$(IntDir)main_gfx906.o" "-input=$(IntDir)main_gfx908.o" "-input=$(IntDir)main_gfx90a.o" "-input=$(IntDir)main_gfx1030.o" "-output=$(IntDir)offload_bundle.hipfb" |
||||
cd $(IntDir) && "$(ClangToolPath)llvm-mc" -triple host-x86_64-pc-windows-msvc "hip_obj_gen_win.mcin" -o "main_device.obj" --filetype=obj</Command> |
||||
</CustomBuildStep> |
||||
<CustomBuildStep> |
||||
<Message>Generating Device Offload Object</Message> |
||||
</CustomBuildStep> |
||||
<CustomBuildStep> |
||||
<Outputs>$(IntDIr)main_device.obj</Outputs> |
||||
</CustomBuildStep> |
||||
<CustomBuildStep> |
||||
<Inputs>$(IntDir)main_gfx803.o;$(IntDir)main_gfx900.o;$(IntDir)main_gfx906.o;$(IntDir)main_gfx908.o;$(IntDir)main_gfx90a.o;$(IntDir)main_gfx1030.o;$(IntDir)hip_objgen_win.mcin;%(Inputs)</Inputs> |
||||
</CustomBuildStep> |
||||
</ItemDefinitionGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level2</WarningLevel> |
||||
<FunctionLevelLinking>true</FunctionLevelLinking> |
||||
<IntrinsicFunctions>true</IntrinsicFunctions> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding> |
||||
<OptimizeReferences>true</OptimizeReferences> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
<AdditionalDependencies>$(IntDir)main_device.obj;%(AdditionalDependencies)</AdditionalDependencies> |
||||
</Link> |
||||
<CustomBuild> |
||||
<Message>Compiling Device LLVM IR %(Identity)</Message> |
||||
<Command>"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa</Command> |
||||
<Outputs>$(IntDir)%(FileName).o</Outputs> |
||||
</CustomBuild> |
||||
<CustomBuildStep> |
||||
<Command>"$(ClangToolPath)clang-offload-bundler" -type=o -bundle-align=4096 -targets=host-x86_64-pc-windows-msvc,hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900,hipv4-amdgcn-amd-amdhsa--gfx906,hipv4-amdgcn-amd-amdhsa--gfx908,hipv4-amdgcn-amd-amdhsa-gfx90a,hipv4-amdgcn-amd-amdhsa--gfx1030 -input=NUL "-input=$(IntDir)main_gfx803.o" "-input=$(IntDir)main_gfx900.o" "-input=$(IntDir)main_gfx906.o" "-input=$(IntDir)main_gfx908.o" "-input=$(IntDir)main_gfx90a.o" "-input=$(IntDir)main_gfx1030.o" "-output=$(IntDir)offload_bundle.hipfb" |
||||
cd $(IntDir) && "$(ClangToolPath)llvm-mc" -triple host-x86_64-pc-windows-msvc "hip_obj_gen_win.mcin" -o "main_device.obj" --filetype=obj</Command> |
||||
</CustomBuildStep> |
||||
<CustomBuildStep> |
||||
<Message>Generating Device Offload Object</Message> |
||||
</CustomBuildStep> |
||||
<CustomBuildStep> |
||||
<Outputs>$(IntDIr)main_device.obj</Outputs> |
||||
</CustomBuildStep> |
||||
<CustomBuildStep> |
||||
<Inputs>$(IntDir)main_gfx803.o;$(IntDir)main_gfx900.o;$(IntDir)main_gfx906.o;$(IntDir)main_gfx908.o;$(IntDir)main_gfx90a.o;$(IntDir)main_gfx1030.o;$(IntDir)hip_objgen_win.mcin;%(Inputs)</Inputs> |
||||
</CustomBuildStep> |
||||
</ItemDefinitionGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> |
||||
<ImportGroup Label="ExtensionTargets"> |
||||
<Import Condition="'$(HIPTargetsImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.targets" /> |
||||
</ImportGroup> |
||||
</Project> |
||||
<?xml version="1.0" encoding="utf-8"?> |
||||
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> |
||||
<ItemGroup Label="ProjectConfigurations"> |
||||
<ProjectConfiguration Include="Debug|x64"> |
||||
<Configuration>Debug</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
<ProjectConfiguration Include="Release|x64"> |
||||
<Configuration>Release</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClCompile Include="main.hip"> |
||||
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">--cuda-host-only</AdditionalOptions> |
||||
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">--cuda-host-only</AdditionalOptions> |
||||
</ClCompile> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClInclude Include="..\..\Common\example_utils.hpp" /> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<CustomBuild Include="hip_obj_gen_win.mcin"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">copy %(Identity) "$(IntDir)%(Identity)"</Command> |
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Copying %(Identity)</Message> |
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Identity)</Outputs> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy %(Identity) "$(IntDir)%(Identity)"</Command> |
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Copying %(Identity)</Message> |
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Identity)</Outputs> |
||||
</CustomBuild> |
||||
<CustomBuild Include="main_gfx1030.ll"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx1030</Command> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx1030</Command> |
||||
</CustomBuild> |
||||
<CustomBuild Include="main_gfx803.ll"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx803</Command> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx803</Command> |
||||
</CustomBuild> |
||||
<CustomBuild Include="main_gfx900.ll"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ClangToolPath)clang+"+ -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx900</Command> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ClangToolPath)clang+"+ -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx900</Command> |
||||
</CustomBuild> |
||||
<CustomBuild Include="main_gfx906.ll"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx906</Command> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx906</Command> |
||||
</CustomBuild> |
||||
<CustomBuild Include="main_gfx908.ll"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx908</Command> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx908</Command> |
||||
</CustomBuild> |
||||
<CustomBuild Include="main_gfx90a.ll"> |
||||
<FileType>Document</FileType> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx90a </Command> |
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx90a </Command> |
||||
</CustomBuild> |
||||
</ItemGroup> |
||||
<PropertyGroup Label="Globals"> |
||||
<VCProjectVersion>15.0</VCProjectVersion> |
||||
<ProjectGuid>{dbb8dfe9-cb1b-473c-937c-2a8120e0d819}</ProjectGuid> |
||||
<Keyword>Win32Proj</Keyword> |
||||
<RootNamespace>llvm_ir_to_executable_vs2019</RootNamespace> |
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>true</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>false</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<WholeProgramOptimization>true</WholeProgramOptimization> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> |
||||
<ImportGroup Label="ExtensionSettings"> |
||||
<Import Condition="'$(HIPPropertiesImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.props" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="Shared"> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<PropertyGroup Label="UserMacros" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<LinkIncremental>true</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
<CustomBuildBeforeTargets>ClCompile</CustomBuildBeforeTargets> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<LinkIncremental>false</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
<CustomBuildBeforeTargets>ClCompile</CustomBuildBeforeTargets> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level2</WarningLevel> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
<AdditionalDependencies>$(IntDir)main_device.obj;%(AdditionalDependencies)</AdditionalDependencies> |
||||
</Link> |
||||
<CustomBuild> |
||||
<Message>Compiling Device LLVM IR %(Identity)</Message> |
||||
<Command>"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa</Command> |
||||
<Outputs>$(IntDir)%(FileName).o</Outputs> |
||||
</CustomBuild> |
||||
<CustomBuildStep> |
||||
<Command>"$(ClangToolPath)clang-offload-bundler" -type=o -bundle-align=4096 -targets=host-x86_64-pc-windows-msvc,hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900,hipv4-amdgcn-amd-amdhsa--gfx906,hipv4-amdgcn-amd-amdhsa--gfx908,hipv4-amdgcn-amd-amdhsa--gfx90a,hipv4-amdgcn-amd-amdhsa--gfx1030 -input=nul "-input=$(IntDir)main_gfx803.o" "-input=$(IntDir)main_gfx900.o" "-input=$(IntDir)main_gfx906.o" "-input=$(IntDir)main_gfx908.o" "-input=$(IntDir)main_gfx90a.o" "-input=$(IntDir)main_gfx1030.o" "-output=$(IntDir)offload_bundle.hipfb" |
||||
cd $(IntDir) && "$(ClangToolPath)llvm-mc" -triple host-x86_64-pc-windows-msvc "hip_obj_gen_win.mcin" -o "main_device.obj" --filetype=obj</Command> |
||||
</CustomBuildStep> |
||||
<CustomBuildStep> |
||||
<Message>Generating Device Offload Object</Message> |
||||
</CustomBuildStep> |
||||
<CustomBuildStep> |
||||
<Outputs>$(IntDIr)main_device.obj</Outputs> |
||||
</CustomBuildStep> |
||||
<CustomBuildStep> |
||||
<Inputs>$(IntDir)main_gfx803.o;$(IntDir)main_gfx900.o;$(IntDir)main_gfx906.o;$(IntDir)main_gfx908.o;$(IntDir)main_gfx90a.o;$(IntDir)main_gfx1030.o;$(IntDir)hip_objgen_win.mcin;%(Inputs)</Inputs> |
||||
</CustomBuildStep> |
||||
</ItemDefinitionGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level2</WarningLevel> |
||||
<FunctionLevelLinking>true</FunctionLevelLinking> |
||||
<IntrinsicFunctions>true</IntrinsicFunctions> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding> |
||||
<OptimizeReferences>true</OptimizeReferences> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
<AdditionalDependencies>$(IntDir)main_device.obj;%(AdditionalDependencies)</AdditionalDependencies> |
||||
</Link> |
||||
<CustomBuild> |
||||
<Message>Compiling Device LLVM IR %(Identity)</Message> |
||||
<Command>"$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa</Command> |
||||
<Outputs>$(IntDir)%(FileName).o</Outputs> |
||||
</CustomBuild> |
||||
<CustomBuildStep> |
||||
<Command>"$(ClangToolPath)clang-offload-bundler" -type=o -bundle-align=4096 -targets=host-x86_64-pc-windows-msvc,hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900,hipv4-amdgcn-amd-amdhsa--gfx906,hipv4-amdgcn-amd-amdhsa--gfx908,hipv4-amdgcn-amd-amdhsa-gfx90a,hipv4-amdgcn-amd-amdhsa--gfx1030 -input=NUL "-input=$(IntDir)main_gfx803.o" "-input=$(IntDir)main_gfx900.o" "-input=$(IntDir)main_gfx906.o" "-input=$(IntDir)main_gfx908.o" "-input=$(IntDir)main_gfx90a.o" "-input=$(IntDir)main_gfx1030.o" "-output=$(IntDir)offload_bundle.hipfb" |
||||
cd $(IntDir) && "$(ClangToolPath)llvm-mc" -triple host-x86_64-pc-windows-msvc "hip_obj_gen_win.mcin" -o "main_device.obj" --filetype=obj</Command> |
||||
</CustomBuildStep> |
||||
<CustomBuildStep> |
||||
<Message>Generating Device Offload Object</Message> |
||||
</CustomBuildStep> |
||||
<CustomBuildStep> |
||||
<Outputs>$(IntDIr)main_device.obj</Outputs> |
||||
</CustomBuildStep> |
||||
<CustomBuildStep> |
||||
<Inputs>$(IntDir)main_gfx803.o;$(IntDir)main_gfx900.o;$(IntDir)main_gfx906.o;$(IntDir)main_gfx908.o;$(IntDir)main_gfx90a.o;$(IntDir)main_gfx1030.o;$(IntDir)hip_objgen_win.mcin;%(Inputs)</Inputs> |
||||
</CustomBuildStep> |
||||
</ItemDefinitionGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> |
||||
<ImportGroup Label="ExtensionTargets"> |
||||
<Import Condition="'$(HIPTargetsImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.targets" /> |
||||
</ImportGroup> |
||||
</Project> |
||||
|
@ -1,101 +1,101 @@
@@ -1,101 +1,101 @@
|
||||
<?xml version="1.0" encoding="utf-8"?> |
||||
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> |
||||
<ItemGroup Label="ProjectConfigurations"> |
||||
<ProjectConfiguration Include="Debug|x64"> |
||||
<Configuration>Debug</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
<ProjectConfiguration Include="Release|x64"> |
||||
<Configuration>Release</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClCompile Include="main.hip" /> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClInclude Include="..\..\Common\example_utils.hpp" /> |
||||
</ItemGroup> |
||||
<PropertyGroup Label="Globals"> |
||||
<VCProjectVersion>15.0</VCProjectVersion> |
||||
<ProjectGuid>{ACC2A1E7-5865-4FAE-9016-E6EF73F8FA9E}</ProjectGuid> |
||||
<Keyword>Win32Proj</Keyword> |
||||
<RootNamespace>matrix_multiplication_vs2019</RootNamespace> |
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>true</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>false</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<WholeProgramOptimization>true</WholeProgramOptimization> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> |
||||
<ImportGroup Label="ExtensionSettings"> |
||||
<Import Condition="'$(HIPPropertiesImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.props" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="Shared"> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<PropertyGroup Label="UserMacros" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<LinkIncremental>true</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<LinkIncremental>false</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level1</WarningLevel> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
<RuntimeTypeInfo>true</RuntimeTypeInfo> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
</Link> |
||||
</ItemDefinitionGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level2</WarningLevel> |
||||
<FunctionLevelLinking>true</FunctionLevelLinking> |
||||
<IntrinsicFunctions>true</IntrinsicFunctions> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
<RuntimeTypeInfo>true</RuntimeTypeInfo> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding> |
||||
<OptimizeReferences>true</OptimizeReferences> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
</Link> |
||||
</ItemDefinitionGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> |
||||
<ImportGroup Label="ExtensionTargets"> |
||||
<Import Condition="'$(HIPTargetsImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.targets" /> |
||||
</ImportGroup> |
||||
</Project> |
||||
<?xml version="1.0" encoding="utf-8"?> |
||||
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> |
||||
<ItemGroup Label="ProjectConfigurations"> |
||||
<ProjectConfiguration Include="Debug|x64"> |
||||
<Configuration>Debug</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
<ProjectConfiguration Include="Release|x64"> |
||||
<Configuration>Release</Configuration> |
||||
<Platform>x64</Platform> |
||||
</ProjectConfiguration> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClCompile Include="main.hip" /> |
||||
</ItemGroup> |
||||
<ItemGroup> |
||||
<ClInclude Include="..\..\Common\example_utils.hpp" /> |
||||
</ItemGroup> |
||||
<PropertyGroup Label="Globals"> |
||||
<VCProjectVersion>15.0</VCProjectVersion> |
||||
<ProjectGuid>{ACC2A1E7-5865-4FAE-9016-E6EF73F8FA9E}</ProjectGuid> |
||||
<Keyword>Win32Proj</Keyword> |
||||
<RootNamespace>matrix_multiplication_vs2019</RootNamespace> |
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>true</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> |
||||
<ConfigurationType>Application</ConfigurationType> |
||||
<UseDebugLibraries>false</UseDebugLibraries> |
||||
<PlatformToolset>HIP</PlatformToolset> |
||||
<WholeProgramOptimization>true</WholeProgramOptimization> |
||||
<CharacterSet>Unicode</CharacterSet> |
||||
</PropertyGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> |
||||
<ImportGroup Label="ExtensionSettings"> |
||||
<Import Condition="'$(HIPPropertiesImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.props" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="Shared"> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
</ImportGroup> |
||||
<PropertyGroup Label="UserMacros" /> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<LinkIncremental>true</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
</PropertyGroup> |
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<LinkIncremental>false</LinkIncremental> |
||||
<TargetName>hip_$(ProjectName)</TargetName> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<PropertyGroup Label="HIP" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<TargetGPUArchitectures>gfx1030</TargetGPUArchitectures> |
||||
</PropertyGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level2</WarningLevel> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
<RuntimeTypeInfo>true</RuntimeTypeInfo> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
</Link> |
||||
</ItemDefinitionGroup> |
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> |
||||
<ClCompile> |
||||
<WarningLevel>Level2</WarningLevel> |
||||
<FunctionLevelLinking>true</FunctionLevelLinking> |
||||
<IntrinsicFunctions>true</IntrinsicFunctions> |
||||
<PreprocessorDefinitions>__HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
||||
<LanguageStandard>stdcpp17</LanguageStandard> |
||||
<AdditionalIncludeDirectories>$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
||||
<RuntimeTypeInfo>true</RuntimeTypeInfo> |
||||
</ClCompile> |
||||
<Link> |
||||
<SubSystem>Console</SubSystem> |
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding> |
||||
<OptimizeReferences>true</OptimizeReferences> |
||||
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
</Link> |
||||
</ItemDefinitionGroup> |
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> |
||||
<ImportGroup Label="ExtensionTargets"> |
||||
<Import Condition="'$(HIPTargetsImported)' != 'true'" Project="$(VCTargetsPath)\AMD.HIP.Common.targets" /> |
||||
</ImportGroup> |
||||
</Project> |
||||
|
@ -0,0 +1,2 @@
@@ -0,0 +1,2 @@
|
||||
hip_module_api |
||||
module.co |
@ -0,0 +1,76 @@
@@ -0,0 +1,76 @@
|
||||
# MIT License |
||||
# |
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
# |
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
# of this software and associated documentation files (the "Software"), to deal |
||||
# in the Software without restriction, including without limitation the rights |
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
# copies of the Software, and to permit persons to whom the Software is |
||||
# furnished to do so, subject to the following conditions: |
||||
# |
||||
# The above copyright notice and this permission notice shall be included in all |
||||
# copies or substantial portions of the Software. |
||||
# |
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||
# SOFTWARE. |
||||
|
||||
set(example_name hip_module_api) |
||||
|
||||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR) |
||||
project(${example_name} LANGUAGES CXX) |
||||
|
||||
set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA") |
||||
|
||||
# Only supported on HIP (not CUDA) |
||||
if(NOT "${GPU_RUNTIME}" STREQUAL "HIP") |
||||
set(ERROR_MESSAGE "GPU_RUNTIME is set to \"${GPU_RUNTIME}\".\nGPU_RUNTIME must be HIP.") |
||||
message(FATAL_ERROR ${ERROR_MESSAGE}) |
||||
endif() |
||||
|
||||
enable_language(${GPU_RUNTIME}) |
||||
set(CMAKE_${GPU_RUNTIME}_STANDARD 17) |
||||
set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF) |
||||
set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON) |
||||
|
||||
set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation") |
||||
if(NOT CMAKE_PREFIX_PATH) |
||||
set(CMAKE_PREFIX_PATH "${ROCM_ROOT}") |
||||
endif() |
||||
|
||||
set(offload_archs ${CMAKE_HIP_ARCHITECTURES}) |
||||
list(TRANSFORM offload_archs PREPEND "--offload-arch=") |
||||
|
||||
set(module ${CMAKE_CURRENT_BINARY_DIR}/module.co) |
||||
set(module_sources ${CMAKE_CURRENT_SOURCE_DIR}/module.hip) |
||||
if(CMAKE_BUILD_TYPE EQUAL "Debug") |
||||
set(module_flags ${CMAKE_HIP_FLAGS} ${CMAKE_HIP_FLAGS_DEBUG}) |
||||
elseif(CMAKE_BUILD_TYPE EQUAL "Release") |
||||
set(module_flags ${CMAKE_HIP_FLAGS} ${CMAKE_HIP_FLAGS_RELEASE}) |
||||
elseif(CMAKE_BUILD_TYPE EQUAL "MinSizeRel") |
||||
set(module_flags ${CMAKE_HIP_FLAGS} ${CMAKE_HIP_FLAGS_MINSIZEREL}) |
||||
elseif(CMAKE_BUILD_TYPE EQUAL "RelWithDebInfo") |
||||
set(module_flags ${CMAKE_HIP_FLAGS} ${CMAKE_HIP_FLAGS_RELWITHDEBINFO}) |
||||
endif() |
||||
|
||||
add_custom_command( |
||||
OUTPUT ${module} |
||||
COMMAND ${CMAKE_HIP_COMPILER} ${module_flags} ${module_sources} ${offload_archs} --cuda-device-only -o ${module} |
||||
DEPENDS ${module_sources} |
||||
COMMENT "Compiling HIP code object module.co" |
||||
) |
||||
|
||||
add_custom_target(module ALL DEPENDS ${module}) |
||||
|
||||
add_executable(${example_name} main.hip) |
||||
# Make example runnable using ctest |
||||
add_test(${example_name} ${example_name}) |
||||
|
||||
set(include_dirs "../../Common") |
||||
target_include_directories(${example_name} PRIVATE ${include_dirs}) |
||||
set_source_files_properties(main.hip PROPERTIES LANGUAGE ${GPU_RUNTIME}) |
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue