You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
283 lines
11 KiB
283 lines
11 KiB
// MIT License |
|
// |
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
|
// |
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
|
// of this software and associated documentation files (the "Software"), to deal |
|
// in the Software without restriction, including without limitation the rights |
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
// copies of the Software, and to permit persons to whom the Software is |
|
// furnished to do so, subject to the following conditions: |
|
// |
|
// The above copyright notice and this permission notice shall be included in all |
|
// copies or substantial portions of the Software. |
|
// |
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|
// SOFTWARE. |
|
|
|
#include "cmdparser.hpp" |
|
#include "example_utils.hpp" |
|
|
|
#include <hip/hip_runtime.h> |
|
|
|
// Workaround for ROCm on Windows not including `__half` definitions, in a host compiler. |
|
#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP__) && (defined(WIN32) || defined(_WIN32)) |
|
#include <hip/amd_detail/hip_fp16_gcc.h> |
|
#endif |
|
#include <rocrand/rocrand.hpp> |
|
|
|
#include <chrono> |
|
#include <iostream> |
|
#include <random> |
|
#include <string_view> |
|
#include <vector> |
|
|
|
// An anonymous namespace sets static linkage to its contents. |
|
// This means that the contained function definitions will only be visible |
|
// in the current compilation unit (i.e. cpp source file). |
|
namespace |
|
{ |
|
|
|
/// \brief The random distribution kind selected on the command line. |
|
enum class Distribution |
|
{ |
|
uniform_int, |
|
uniform_real, |
|
normal, |
|
poisson |
|
}; |
|
|
|
/// \brief Selects the device (GPU) with the provided ID. If it cannot be selected |
|
/// (e.g. a non-existent device ID is passed), an exception is thrown. |
|
/// Otherwise, the name of the device is queried and printed to the standard output. |
|
void set_device(const int device_id) |
|
{ |
|
HIP_CHECK(hipSetDevice(device_id)); |
|
hipDeviceProp_t properties; |
|
HIP_CHECK(hipGetDeviceProperties(&properties, device_id)); |
|
std::cout << "Device is set to \"" << properties.name << "\"" << std::endl; |
|
} |
|
|
|
/// \brief Generates a random vector of type \p T on the device (GPU) with the size of \p size |
|
/// using random distribution \p Distribution. |
|
/// \p Distribution must be a rocRAND distribution. |
|
/// The data is generated into device memory, and then is copied to an \p std::vector. |
|
template<typename T, typename Distribution> |
|
std::vector<T> generate_random_vector_on_device(const size_t size) |
|
{ |
|
// Instantiating a rocRAND C++ engine object takes care of initialization. |
|
rocrand_cpp::default_random_engine engine; |
|
|
|
// The same is true about a rocRAND distribution. |
|
Distribution distribution; |
|
|
|
// Allocate the requested amount of device memory. |
|
T* device_vector{}; |
|
HIP_CHECK(hipMalloc(&device_vector, size * sizeof(T))) |
|
|
|
// `operator()` of the distribution generates the requested count of random numbers |
|
// into the provided memory location, using the provided random engine. |
|
distribution(engine, device_vector, size); |
|
|
|
// Allocate host memory. |
|
std::vector<T> host_vector(size); |
|
|
|
// Copy the device memory to the host. This call synchronizes the device execution with the host's. |
|
HIP_CHECK( |
|
hipMemcpy(host_vector.data(), device_vector, size * sizeof(T), hipMemcpyDeviceToHost)); |
|
|
|
// Free up the device memory allocated earlier. |
|
HIP_CHECK(hipFree(device_vector)); |
|
|
|
return host_vector; |
|
} |
|
|
|
/// \brief Generates a random vector of type \p T on the host (CPU) with the size of \p size |
|
/// using random distribution \p Distribution. |
|
/// \p Distribution must satisfy the standard RandomNumberDistribution requirements. |
|
template<typename T, typename Distribution> |
|
std::vector<T> generate_random_vector_on_host(const size_t size) |
|
{ |
|
// Instantiate the standard default random engine. |
|
std::default_random_engine engine; |
|
// Instantiate the standard random distribution. |
|
Distribution distribution; |
|
// Allocate host memory. |
|
std::vector<T> host_vector(size); |
|
|
|
// `std::generate` calls the provided lambda for every element in the vector to set |
|
// the value of the element. |
|
std::generate(host_vector.begin(), host_vector.end(), [&]() { return distribution(engine); }); |
|
|
|
return host_vector; |
|
} |
|
|
|
/// \brief Generates a random vector of type \p T with the size of \p size both on the device (GPU) |
|
/// using the distribution \p DeviceDistribution and on the host (CPU) using the distribution |
|
/// \p HostDistribution. If argument \p print is set, the generated values are printed to the |
|
/// standard output. The time it takes to fill the vectors with random values is measured and printed |
|
/// to the standard output for both the device and the host case. |
|
template<typename T, typename DeviceDistribution, typename HostDistribution> |
|
void compare_device_and_host_random_number_generation(const size_t size, const bool print) |
|
{ |
|
// This local function measures the time it takes to execute the passed function `operation`. |
|
// Also prints the resulting random vector if `print` is set to true. |
|
const auto measure_time_and_print = [=](const auto operation, const std::string_view title) |
|
{ |
|
// Record the time before and after invoking `operation`. |
|
const auto start = std::chrono::high_resolution_clock::now(); |
|
const auto result_vector = operation(); |
|
const auto end = std::chrono::high_resolution_clock::now(); |
|
|
|
// Convert and print the duration in milliseconds. |
|
const std::chrono::duration<double, std::milli> duration_ms = end - start; |
|
std::cout << title << " took " << duration_ms.count() << " ms" << std::endl; |
|
if(print) |
|
{ |
|
// Print the list of space-delimited values |
|
for(const auto val : result_vector) |
|
{ |
|
std::cout << val << ' '; |
|
} |
|
std::cout << std::endl; |
|
} |
|
}; |
|
|
|
// Run and measure the random number generation on the device. |
|
measure_time_and_print( |
|
[size]() { return generate_random_vector_on_device<T, DeviceDistribution>(size); }, |
|
"Random number generation on the device"); |
|
|
|
// Run and measure the random number generation on the host. |
|
measure_time_and_print([size]() |
|
{ return generate_random_vector_on_host<T, HostDistribution>(size); }, |
|
"Random number generation on the host"); |
|
} |
|
|
|
/// \brief Executes the random number generation on both the device (GPU) and host (CPU) |
|
/// based on which \p Distribution was selected on the command line. |
|
void dispatch_distribution_type(const Distribution dist, const size_t size, const bool print) |
|
{ |
|
// Based on the passed `Distribution`, select the appropriate template arguments |
|
// to invoke `compare_device_and_host_random_number_generation` with. |
|
switch(dist) |
|
{ |
|
case Distribution::uniform_int: |
|
compare_device_and_host_random_number_generation< |
|
unsigned int, |
|
rocrand_cpp::uniform_int_distribution<unsigned int>, |
|
std::uniform_int_distribution<unsigned int>>(size, print); |
|
break; |
|
case Distribution::uniform_real: |
|
compare_device_and_host_random_number_generation< |
|
float, |
|
rocrand_cpp::uniform_real_distribution<float>, |
|
std::uniform_real_distribution<float>>(size, print); |
|
break; |
|
case Distribution::normal: |
|
compare_device_and_host_random_number_generation< |
|
double, |
|
rocrand_cpp::normal_distribution<double>, |
|
std::normal_distribution<double>>(size, print); |
|
break; |
|
case Distribution::poisson: |
|
compare_device_and_host_random_number_generation< |
|
unsigned int, |
|
rocrand_cpp::poisson_distribution<unsigned int>, |
|
std::poisson_distribution<unsigned int>>(size, print); |
|
default: break; |
|
} |
|
} |
|
|
|
void configure_parser(cli::Parser& parser) |
|
{ |
|
// Default parameters |
|
parser.set_optional<int>("device", "device", 0, |
|
"Device Id"); // Default Device 0 |
|
parser.set_optional<std::string>("distribution", |
|
"distribution", |
|
"uniform_int", |
|
"rocRAND distribution"); // Default "uniform_int" |
|
parser.set_optional<size_t>("size", "size", 10000000, |
|
"Problem size"); // Default 10000000 |
|
|
|
parser.set_optional<bool>( |
|
"print", |
|
"print", |
|
0, |
|
"Toggle printing on or off. This is a boolean argument and takes no value. If it is " |
|
"provided the value is set to \"on\""); // Default "off" |
|
} |
|
|
|
Distribution get_distribution(std::string distribution_arg) |
|
{ |
|
Distribution distribution_enum; |
|
if(distribution_arg == "uniform_int") |
|
{ |
|
distribution_enum = Distribution::uniform_int; |
|
} |
|
else if(distribution_arg == "uniform_real") |
|
{ |
|
distribution_enum = Distribution::uniform_real; |
|
} |
|
else if(distribution_arg == "normal") |
|
{ |
|
distribution_enum = Distribution::normal; |
|
} |
|
else if(distribution_arg == "poisson") |
|
{ |
|
distribution_enum = Distribution::poisson; |
|
} |
|
else |
|
{ |
|
std::cerr << distribution_arg << (" is not a valid distribution.") << std::endl; |
|
exit(error_exit_code); |
|
} |
|
return distribution_enum; |
|
} |
|
|
|
} // namespace |
|
|
|
int main(const int argc, const char** argv) |
|
{ |
|
|
|
// Get the number of hip devices in the system |
|
int number_of_devies = 0; |
|
HIP_CHECK(hipGetDeviceCount(&number_of_devies)) |
|
|
|
if(number_of_devies <= 0) |
|
{ |
|
std::cerr << "HIP supported devices not found!" |
|
<< "\n"; |
|
exit(error_exit_code); |
|
} |
|
|
|
// Parse user inputs |
|
cli::Parser parser(argc, argv); |
|
configure_parser(parser); |
|
parser.run_and_exit_if_error(); |
|
|
|
// Get user arguments, if provided. |
|
const int device_id = parser.get<int>("device"); |
|
if(device_id < 0 || device_id >= number_of_devies) |
|
{ |
|
std::cerr << "Invalid device id " << device_id << "!\n" |
|
<< "Device does not exist\n"; |
|
exit(error_exit_code); |
|
} |
|
|
|
Distribution distribution = get_distribution(parser.get<std::string>("distribution")); |
|
size_t size = parser.get<size_t>("size"); |
|
bool print = parser.get<bool>("print"); |
|
|
|
// Set up the used device (GPU) according to the command line supplied argument. |
|
set_device(device_id); |
|
|
|
// Run the selected measurement on the device (GPU) and host (CPU). |
|
dispatch_distribution_type(distribution, size, print); |
|
}
|
|
|