[go: nahoru, domu]

Skip to content

Commit

Permalink
Add a cudaMallocAsync test
Browse files Browse the repository at this point in the history
  • Loading branch information
nouiz committed May 18, 2021
1 parent 0a0d062 commit 2f856ab
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 1 deletion.
29 changes: 28 additions & 1 deletion tensorflow/core/common_runtime/gpu/gpu_device_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,16 @@ class GPUDeviceTest : public ::testing::Test {
const string& visible_device_list = "",
double per_process_gpu_memory_fraction = 0, int gpu_device_count = 1,
const std::vector<std::vector<float>>& memory_limit_mb = {},
const std::vector<std::vector<int32>>& priority = {}) {
const std::vector<std::vector<int32>>& priority = {},
const bool use_cuda_malloc_async = false) {
SessionOptions options;
ConfigProto* config = &options.config;
(*config->mutable_device_count())["GPU"] = gpu_device_count;
GPUOptions* gpu_options = config->mutable_gpu_options();
gpu_options->set_visible_device_list(visible_device_list);
gpu_options->set_per_process_gpu_memory_fraction(
per_process_gpu_memory_fraction);
gpu_options->set_use_cuda_malloc_async(use_cuda_malloc_async);
for (int i = 0; i < memory_limit_mb.size(); ++i) {
auto virtual_devices =
gpu_options->mutable_experimental()->add_virtual_devices();
Expand Down Expand Up @@ -114,6 +116,31 @@ class GPUDeviceTest : public ::testing::Test {
}
};

TEST_F(GPUDeviceTest, CudaMallocAsync) {
SessionOptions opts = MakeSessionOptions("0", 0, 1, {}, {},
/*use_cuda_malloc_async=*/true);
std::vector<std::unique_ptr<Device>> devices;
Status status;
{ // The new scope is to trigger the destruction of the object.
status = DeviceFactory::GetFactory("GPU")->CreateDevices(
opts, kDeviceNamePrefix, &devices);
EXPECT_EQ(devices.size(), 1);
Device* device = devices[0].get();
auto* device_info = device->tensorflow_gpu_device_info();
CHECK(device_info);
DeviceContext* device_context = device_info->default_context;

AllocatorAttributes allocator_attributes = AllocatorAttributes();
allocator_attributes.set_gpu_compatible(true);
Allocator* allocator = devices[0]->GetAllocator(allocator_attributes);
void* ptr = allocator->AllocateRaw(Allocator::kAllocatorAlignment,
1024);
EXPECT_NE(ptr, nullptr);
allocator->DeallocateRaw(ptr);
}
EXPECT_EQ(status.code(), error::OK);
}

TEST_F(GPUDeviceTest, FailedToParseVisibleDeviceList) {
SessionOptions opts = MakeSessionOptions("0,abc");
std::vector<std::unique_ptr<Device>> devices;
Expand Down
3 changes: 3 additions & 0 deletions tensorflow/core/protobuf/config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,9 @@ message GPUOptions {
// to API stability guarantees in
// https://www.tensorflow.org/guide/version_compat.
Experimental experimental = 9;

// When true, use CUDA cudaMallocAsync API instead of TF gpu allocator.
bool use_cuda_malloc_async = 10;
}

// Options passed to the graph optimizer
Expand Down

0 comments on commit 2f856ab

Please sign in to comment.