Add a cudaMallocAsync test

tensorflow · May 18, 2021 · 2f856ab · 2f856ab
1 parent 0a0d062
commit 2f856ab
Show file tree

Hide file tree

Showing 2 changed files with 31 additions and 1 deletion.
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
@@ -71,14 +71,16 @@ class GPUDeviceTest : public ::testing::Test {
       const string& visible_device_list = "",
       double per_process_gpu_memory_fraction = 0, int gpu_device_count = 1,
       const std::vector<std::vector<float>>& memory_limit_mb = {},
-      const std::vector<std::vector<int32>>& priority = {}) {
+      const std::vector<std::vector<int32>>& priority = {},
+      const bool use_cuda_malloc_async = false) {
     SessionOptions options;
     ConfigProto* config = &options.config;
     (*config->mutable_device_count())["GPU"] = gpu_device_count;
     GPUOptions* gpu_options = config->mutable_gpu_options();
     gpu_options->set_visible_device_list(visible_device_list);
     gpu_options->set_per_process_gpu_memory_fraction(
         per_process_gpu_memory_fraction);
+    gpu_options->set_use_cuda_malloc_async(use_cuda_malloc_async);
     for (int i = 0; i < memory_limit_mb.size(); ++i) {
       auto virtual_devices =
           gpu_options->mutable_experimental()->add_virtual_devices();
@@ -114,6 +116,31 @@ class GPUDeviceTest : public ::testing::Test {
   }
 };
 
+TEST_F(GPUDeviceTest, CudaMallocAsync) {
+  SessionOptions opts = MakeSessionOptions("0", 0, 1, {}, {},
+                                           /*use_cuda_malloc_async=*/true);
+  std::vector<std::unique_ptr<Device>> devices;
+  Status status;
+  { // The new scope is to trigger the destruction of the object.
+    status = DeviceFactory::GetFactory("GPU")->CreateDevices(
+        opts, kDeviceNamePrefix, &devices);
+    EXPECT_EQ(devices.size(), 1);
+    Device* device = devices[0].get();
+    auto* device_info = device->tensorflow_gpu_device_info();
+    CHECK(device_info);
+    DeviceContext* device_context = device_info->default_context;
+
+    AllocatorAttributes allocator_attributes = AllocatorAttributes();
+    allocator_attributes.set_gpu_compatible(true);
+    Allocator* allocator = devices[0]->GetAllocator(allocator_attributes);
+    void* ptr = allocator->AllocateRaw(Allocator::kAllocatorAlignment,
+                                       1024);
+    EXPECT_NE(ptr, nullptr);
+    allocator->DeallocateRaw(ptr);
+  }
+  EXPECT_EQ(status.code(), error::OK);
+}
+
 TEST_F(GPUDeviceTest, FailedToParseVisibleDeviceList) {
   SessionOptions opts = MakeSessionOptions("0,abc");
   std::vector<std::unique_ptr<Device>> devices;

diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto
@@ -203,6 +203,9 @@ message GPUOptions {
   // to API stability guarantees in
   // https://www.tensorflow.org/guide/version_compat.
   Experimental experimental = 9;
+
+  // When true, use CUDA cudaMallocAsync API instead of TF gpu allocator.
+  bool use_cuda_malloc_async = 10;
 }
 
 // Options passed to the graph optimizer