Skip to content

Commit

Permalink
Output only maximum memory per device
Browse files Browse the repository at this point in the history
  • Loading branch information
yenong-amd committed May 18, 2023
1 parent 0bf0979 commit 4f24b81
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 6 deletions.
30 changes: 27 additions & 3 deletions clients/common/utility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -352,10 +352,14 @@ void rocblas_local_handle::rocblas_stream_end_capture()
#endif
}

void rocblas_parallel_initialize_thread(int id)
void rocblas_parallel_initialize_thread(int id, size_t& memory_used)
{
size_t before_init, after_init, total_memory;
CHECK_HIP_ERROR(hipSetDevice(id));
CHECK_HIP_ERROR(hipMemGetInfo(&before_init, &total_memory));
rocblas_initialize();
CHECK_HIP_ERROR(hipMemGetInfo(&after_init, &total_memory));
memory_used = before_init - after_init;
}

/*!
Expand All @@ -369,17 +373,26 @@ void rocblas_parallel_initialize_thread(int id)
*/
void rocblas_parallel_initialize(int parallel_devices)
{
auto thread = std::make_unique<std::thread[]>(parallel_devices);
auto thread = std::make_unique<std::thread[]>(parallel_devices);
std::vector<size_t> init_memory(parallel_devices);

// Store the start timepoint of rocblas initialize
auto start_time = std::chrono::steady_clock::now();

if(parallel_devices == 1)
{
size_t before_init, after_init, total_memory;
CHECK_HIP_ERROR(hipMemGetInfo(&before_init, &total_memory));
rocblas_initialize();
CHECK_HIP_ERROR(hipMemGetInfo(&after_init, &total_memory));
init_memory[0] = before_init - after_init;
}
else
{

for(int id = 0; id < parallel_devices; ++id)
thread[id] = std::thread(rocblas_parallel_initialize_thread, id);
thread[id]
= std::thread(rocblas_parallel_initialize_thread, id, std::ref(init_memory[id]));
for(int id = 0; id < parallel_devices; ++id)
thread[id].join();
}
Expand Down Expand Up @@ -410,4 +423,15 @@ void rocblas_parallel_initialize(int parallel_devices)
rocblas_cerr << "\nrocBLAS info: average time to initialize each device exceeded the max "
"duration of "
<< max_duration << " milliseconds. Check CPU's load metrics." << std::endl;

constexpr static float max_memory = 1.0;
auto max_library_size
= *std::max_element(std::begin(init_memory), std::end(init_memory)) * 1.0e-9;

rocblas_cout << "\nrocBLAS info: maximum library size per device is " << max_library_size
<< " GB." << std::endl;
if(max_library_size > max_memory)
rocblas_cerr << "\nrocBLAS info: max kernel library size " << max_library_size
<< " GB exceeds the max recommended memory " << max_memory
<< " GB. Check library logic file sizes." << std::endl;
}
4 changes: 1 addition & 3 deletions library/src/tensile_host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -701,14 +701,12 @@ namespace
int g = glob(dir.c_str(), GLOB_NOSORT, nullptr, &glob_result);
if(!g)
{
const char* experimental = getenv("ROCBLAS_TENSILE_EXPERIMENTAL_SELECTION");
for(size_t i = 0; i < glob_result.gl_pathc; ++i)
{
std::string cofile = glob_result.gl_pathv[i];
if(!skip_xnack.empty() && cofile.find(skip_xnack) != std::string::npos)
continue;
if((experimental == nullptr || experimental[0] == '\0')
&& cofile.find("Experimental") != std::string::npos)
if(cofile.find("Experimental") != std::string::npos)
continue;
adapter.loadCodeObjectFile(cofile);
}
Expand Down

0 comments on commit 4f24b81

Please sign in to comment.