You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
248 lines
10 KiB
248 lines
10 KiB
// MIT License |
|
// |
|
// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. |
|
// |
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
|
// of this software and associated documentation files (the "Software"), to deal |
|
// in the Software without restriction, including without limitation the rights |
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
// copies of the Software, and to permit persons to whom the Software is |
|
// furnished to do so, subject to the following conditions: |
|
// |
|
// The above copyright notice and this permission notice shall be included in all |
|
// copies or substantial portions of the Software. |
|
// |
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|
// SOFTWARE. |
|
|
|
#include "cmdparser.hpp" |
|
#include "example_utils.hpp" |
|
#include "hipblas_utils.hpp" |
|
#include "hipsolver_utils.hpp" |
|
|
|
#include <hip/hip_runtime.h> |
|
#include <hipblas/hipblas.h> |
|
#include <hipsolver/hipsolver.h> |
|
|
|
#include <iostream> |
|
#include <limits> |
|
#include <numeric> |
|
#include <random> |
|
#include <vector> |
|
|
|
int main(const int argc, char* argv[]) |
|
{ |
|
// 1. Parse command line arguments. |
|
cli::Parser parser(argc, argv); |
|
parser.set_optional<int>("n", "n", 3, "Size of n x n input matrices"); |
|
parser.set_optional<int>("c", "batch_count", 2, "Number of matrices in the input batch"); |
|
parser.run_and_exit_if_error(); |
|
|
|
// Get the n x n matrices size. |
|
const int n = parser.get<int>("n"); |
|
if(n <= 0) |
|
{ |
|
std::cout << "Value of 'n' should be greater than 0" << std::endl; |
|
return error_exit_code; |
|
} |
|
const int lda = n; |
|
const int size_matrix = n * lda; |
|
|
|
// Get the batch size. |
|
const int batch_count = parser.get<int>("c"); |
|
if(batch_count <= 0) |
|
{ |
|
std::cout << "Batch size should be at least 1" << std::endl; |
|
return error_exit_code; |
|
} |
|
|
|
// 2. Allocate and initialize the host side inputs. |
|
std::vector<double> A(size_matrix * batch_count); // Input batch and resulting eigenvectors |
|
std::vector<double> W(n * batch_count); // Resulting eigenvalues |
|
|
|
// Random and symmetric initialization of the input batch matrices. |
|
std::default_random_engine generator; |
|
std::uniform_real_distribution<double> distribution(0., 2.); |
|
auto random_number = [&]() { return distribution(generator); }; |
|
|
|
for(int k = 0; k < batch_count * size_matrix; k += size_matrix) |
|
{ |
|
for(int i = 0; i < n; ++i) |
|
{ |
|
A[k + (lda + 1) * i] = random_number(); |
|
for(int j = 0; j < i; ++j) |
|
{ |
|
A[k + i * lda + j] = A[k + j * lda + i] = random_number(); |
|
} |
|
} |
|
} |
|
|
|
// 3. Allocate device memory and copy input data from host. |
|
double* d_A{}; |
|
double* d_W{}; |
|
int* d_info{}; |
|
|
|
HIP_CHECK(hipMalloc(&d_A, sizeof(double) * A.size())); |
|
HIP_CHECK(hipMalloc(&d_W, sizeof(double) * W.size())); |
|
HIP_CHECK(hipMalloc(&d_info, sizeof(int))); |
|
HIP_CHECK(hipMemcpy(d_A, A.data(), sizeof(double) * A.size(), hipMemcpyHostToDevice)); |
|
|
|
// 4. Initialize hipSOLVER by creating a handle. |
|
hipsolverHandle_t hipsolver_handle; |
|
HIPSOLVER_CHECK(hipsolverCreate(&hipsolver_handle)); |
|
|
|
// 5. Set parameters for hipSOLVER's syevjBatched function. |
|
const hipsolverEigMode_t jobz = HIPSOLVER_EIG_MODE_VECTOR; |
|
const hipsolverFillMode_t uplo = HIPSOLVER_FILL_MODE_LOWER; |
|
|
|
hipsolverSyevjInfo_t params; |
|
HIPSOLVER_CHECK(hipsolverCreateSyevjInfo(¶ms)); |
|
HIPSOLVER_CHECK(hipsolverXsyevjSetMaxSweeps(params, 15)); |
|
HIPSOLVER_CHECK(hipsolverXsyevjSetTolerance(params, 1.e-12)); |
|
HIPSOLVER_CHECK(hipsolverXsyevjSetSortEig(params, 1)); |
|
|
|
// 6. Query and allocate working space. |
|
int lwork{}; /* size of workspace in bytes */ |
|
double* d_work{}; /* device workspace */ |
|
HIPSOLVER_CHECK(hipsolverDsyevjBatched_bufferSize(hipsolver_handle, |
|
jobz, |
|
uplo, |
|
n, |
|
d_A, |
|
lda, |
|
d_W, |
|
&lwork, |
|
params, |
|
batch_count)); |
|
HIP_CHECK(hipMalloc(&d_work, lwork)); |
|
|
|
// 7. Invoke hipsolverDsyevjBatched to compute the eigenvalues (written to d_W) and |
|
// eigenvectors (written to d_A) of the matrices in the batch. |
|
HIPSOLVER_CHECK(hipsolverDsyevjBatched(hipsolver_handle, |
|
jobz, |
|
uplo, |
|
n, |
|
d_A, |
|
lda, |
|
d_W, |
|
d_work, |
|
lwork, |
|
d_info, |
|
params, |
|
batch_count)); |
|
// 8. Check returned info value. |
|
int info{}; |
|
HIP_CHECK(hipMemcpy(&info, d_info, sizeof(int), hipMemcpyDeviceToHost)); |
|
|
|
int errors{}; |
|
|
|
if(info < 0) |
|
{ |
|
std::cout << -info << "-th parameter is wrong.\n" << std::endl; |
|
errors++; |
|
} |
|
else if(info > 0) |
|
{ |
|
std::cout << "Computing eigenvalues did not converge.\n" << std::endl; |
|
errors++; |
|
} |
|
else |
|
{ |
|
// 9. Copy results back to host. Use auxiliary matrix X for copying eigenvectors. |
|
std::vector<double> X(size_matrix * batch_count); |
|
|
|
HIP_CHECK(hipMemcpy(X.data(), d_A, sizeof(double) * X.size(), hipMemcpyDeviceToHost)); |
|
HIP_CHECK(hipMemcpy(W.data(), d_W, sizeof(double) * W.size(), hipMemcpyDeviceToHost)); |
|
|
|
// 10. Print eigenvalues and check solution using the hipBLAS API for each matrix of the batch. |
|
// Copy original input matrix to device. |
|
HIP_CHECK(hipMemcpy(d_A, A.data(), sizeof(double) * A.size(), hipMemcpyHostToDevice)); |
|
|
|
// Define necessary constants and auxiliary matrices. |
|
const double eps = 1.0e5 * std::numeric_limits<double>::epsilon(); |
|
const double h_one = 1; |
|
const double h_minus_one = -1; |
|
double* d_accum{}; /* cumulative device matrix */ |
|
HIP_CHECK(hipMalloc(&d_accum, sizeof(double) * size_matrix)); |
|
|
|
// Create a handle and enable passing scalar parameters from a pointer to host memory. |
|
hipblasHandle_t hipblas_handle; |
|
HIPBLAS_CHECK(hipblasCreate(&hipblas_handle)); |
|
HIPBLAS_CHECK(hipblasSetPointerMode(hipblas_handle, HIPBLAS_POINTER_MODE_HOST)); |
|
|
|
for(int i = 0; i < batch_count; ++i) |
|
{ |
|
const int eigvals_offset = i * n; |
|
const int eigvect_offset = i * size_matrix; |
|
|
|
// 10a. Print eigenvalues of matrix i of the batch. |
|
std::cout << "Eigenvalues successfully computed for matrix " << i << " of the batch: " |
|
<< format_range(W.begin() + eigvals_offset, W.begin() + eigvals_offset + n) |
|
<< std::endl; |
|
|
|
// 10b. Check the solution by seeing if A_i * X_i - X_i * diag(W_i) is the zero matrix. |
|
// Firstly, make accum = X_i * diag(W_i). |
|
double* d_X{}; |
|
HIP_CHECK(hipMalloc(&d_X, sizeof(double) * size_matrix)); |
|
HIP_CHECK(hipMemcpy(d_X, |
|
X.data() + eigvect_offset, |
|
sizeof(double) * size_matrix, |
|
hipMemcpyHostToDevice)); |
|
HIPBLAS_CHECK(hipblasDdgmm(hipblas_handle, |
|
HIPBLAS_SIDE_RIGHT, |
|
n, |
|
n, |
|
d_X, |
|
lda, |
|
d_W + eigvals_offset, |
|
1, |
|
d_accum, |
|
lda)); |
|
|
|
// Secondly, make accum = A_i * X_i - accum. |
|
HIPBLAS_CHECK(hipblasDgemm(hipblas_handle, |
|
HIPBLAS_OP_N, |
|
HIPBLAS_OP_N, |
|
n, |
|
n, |
|
n, |
|
&h_one, |
|
d_A + eigvect_offset, |
|
lda, |
|
d_X, |
|
lda, |
|
&h_minus_one, |
|
d_accum, |
|
lda)); |
|
// Copy the result back to the host. |
|
HIP_CHECK(hipMemcpy(A.data() + eigvect_offset, |
|
d_accum, |
|
sizeof(double) * size_matrix, |
|
hipMemcpyDeviceToHost)); |
|
} |
|
// Free resources. |
|
HIP_CHECK(hipFree(d_accum)); |
|
HIPBLAS_CHECK(hipblasDestroy(hipblas_handle)); |
|
|
|
// Check if A is 0. |
|
for(size_t i = 0; i < A.size(); ++i) |
|
{ |
|
errors += std::fabs(A[i]) > eps; |
|
} |
|
} |
|
|
|
// 11. Clean up device allocations and print validation result. |
|
HIPSOLVER_CHECK(hipsolverDestroy(hipsolver_handle)); |
|
HIPSOLVER_CHECK(hipsolverDestroySyevjInfo(params)); |
|
HIP_CHECK(hipFree(d_A)); |
|
HIP_CHECK(hipFree(d_W)); |
|
HIP_CHECK(hipFree(d_work)); |
|
HIP_CHECK(hipFree(d_info)); |
|
|
|
return report_validation_result(errors); |
|
}
|
|
|