[tfdbg2] Fix debugger-generated names for Placeholder and Const ops

to facilitate input tracing in the parts of a Graph that contains those ops. 1. tfdbg2's dumping callback renames Const and Placeholder/PlaceholderWithDefault ops to avoid duplicate (e.g., None) op names in the same graph. It turns out a simpler way to solve it is to use the output tensor name of those ops, which are not duplicate in a graph. 2. tfdbg2 adds Identity ops in the graph under the v1 graph mode. To allow tracing through these Identity ops, we create a new dict called `_tensor_aliases` to map the names of those debugger-generated Identity ops to the names of the original instrumented graph tensors. PiperOrigin-RevId: 309414953 Change-Id: I73c086e8faba849562142a6a3d246938ebd25811
tensorflow · May 1, 2020 · 71336da · 71336da
1 parent c469d6a
commit 71336da
Show file tree

Hide file tree

Showing 2 changed files with 83 additions and 8 deletions.
diff --git a/tensorflow/python/debug/lib/dumping_callback.py b/tensorflow/python/debug/lib/dumping_callback.py
@@ -102,7 +102,12 @@ def __init__(self,
     self._function_to_graph_id = dict()
     self._op_type_to_context_id = dict()
     # Keeps track of counter for symbolic tensors output by in-graph ops.
+    # It is used to make unique names for debugger-generated tensors.
     self._symbolic_tensor_counter = 0
+    # A map from the names of debugger-generated Identity and DebugIdentityV2
+    # tensors to the names of the original insrumented graph tensors. This is
+    # applicable to v1 graph mode only.
+    self._tensor_aliases = dict()
     self._source_file_paths_lock = threading.Lock()
     self._stack_frame_to_id_lock = threading.Lock()
     self._context_lock = threading.Lock()
@@ -298,11 +303,15 @@ def _process_v1_graph_mode_tensor(self,
         # of Const ops can lead to downstream errors related to shapes. We opt
         # to use an identity op to avoid this issue at the cost of slightly
         # larger graph size.
+        self._tensor_aliases[debug_tensor.name] = tensor.name
         return debug_tensor
       else:
-        identity = array_ops.identity(tensor)
+        with self._symbolic_tensor_counter_lock:
+          identity_name = "tfdbg_identity_%d" % self._symbolic_tensor_counter
+        identity = array_ops.identity(tensor, name=identity_name)
         identity.op._add_control_input(  # pylint: disable=protected-access
             debug_tensor.op)
+        self._tensor_aliases[identity.name] = tensor.name
         return identity
 
   def _instrument_symbolic_tensors(self,
@@ -354,6 +363,9 @@ def _instrument_symbolic_tensors(self,
           continue
         # Except in V1 graph mode + control flow, debug_identity_v2 triggers
         # auto control dependency because it's a stateful op.
+        with self._symbolic_tensor_counter_lock:
+          debug_identity_name = ("DebugIdentityV2_%d" %
+                                 self._symbolic_tensor_counter)
         debug_tensor = gen_debug_ops.debug_identity_v2(
             # Use an empty (shape=[0]) float32 tensor for the NO_TENSOR mode
             # as a low-overhead placeholder, since no actual tensor value is
@@ -363,7 +375,8 @@ def _instrument_symbolic_tensors(self,
             op_name=op_name,
             output_slot=output_slot,
             tensor_debug_mode=self._tensor_debug_mode,
-            debug_urls=debug_urls)
+            debug_urls=debug_urls,
+            name=debug_identity_name)
         if is_v1_graph_mode:
           instrumented_tensors.append(self._process_v1_graph_mode_tensor(
               op_type, tensor, debug_tensor, tensor_debug_mode))
@@ -537,13 +550,12 @@ def callback(self,
       output_tensor_ids = self._get_symbolic_tensor_ids(len(outputs))
       if op_type in ("Const", "Placeholder", "PlaceholderWithDefault"):
         # In some cases, the op name of a Const or Placeholder op in a graph
-        # can be duplicate (e.g., with the name "resource").
-        # When this happens, we give the op an debugger-generated name
-        # in order to prevent problems and check failures down the pipe.
-        op_name = "%s_%d" % (op_name, self._symbolic_tensor_counter)
+        # can be duplicate (e.g., `None` or "resource").
+        # When this happens, we use the output tensor name to infer
+        # the non-duplicated tensor name.
+        op_name = outputs[0].name.split(":")[0]
       if is_v1_graph_mode:
         for input_tensor in inputs:
-          # TODO(cais):
           if input_tensor in self._placeholder_to_debug_tensor and outputs:
             outputs[0].op._add_control_input(  # pylint: disable=protected-access
                 self._placeholder_to_debug_tensor[input_tensor].op)
@@ -552,7 +564,9 @@ def callback(self,
           op_name=op_name,
           graph_name=graph.name if hasattr(graph, "name") else None,
           graph_id=context_id,
-          input_names=[input_tensor.name for input_tensor in inputs],
+          input_names=[
+              self._lookup_tensor_name(input_tensor) for input_tensor in inputs
+          ],
           num_outputs=len(outputs),
           output_tensor_ids=output_tensor_ids,
           code_location=self._process_stack_frames())
@@ -577,6 +591,22 @@ def callback(self,
           outputs, op_type, input_ids, output_tensor_device_ids,
           graph_id=context_id))
 
+  def _lookup_tensor_name(self, tensor):
+    """Look up the name of a graph tensor.
+
+    This method maps the name of a debugger-generated Identity or
+    DebugIdentityV2 tensor to the name of the original instrumented tensor,
+    if `tensor` is such a debugger-created tensor.
+    Otherwise, it returns the name of `tensor` as is.
+
+    Args:
+      tensor: The graph tensor to look up the name for.
+
+    Returns:
+      Name of the orignal instrumented tensor as known to the debugger.
+    """
+    return self._tensor_aliases.get(tensor.name, tensor.name)
+
   def _func_graph_id_from_func_name(self, op_type):
     """Attempt to get the ID of a FuncGraph based on an op type name.
 

diff --git a/tensorflow/python/debug/lib/dumping_callback_test.py b/tensorflow/python/debug/lib/dumping_callback_test.py
@@ -1386,6 +1386,51 @@ def iterative_doubling(x, times):
       # The Mul and Sub ops are from the same innermost context.
       self.assertEqual(mul_op_digest.graph_id, sub_op_digest.graph_id)
 
+  @parameterized.named_parameters(
+      ("NoTensor", "NO_TENSOR"),
+      ("Shape", "SHAPE"),
+      ("FullTensor", "FULL_TENSOR"),
+  )
+  @test_util.run_in_graph_and_eager_modes
+  def testGraphInputTracingWorksWithConstAndPlaceholderTensors(
+      self, tensor_debug_mode):
+    writer = dumping_callback.enable_dump_debug_info(
+        self.dump_root, tensor_debug_mode=tensor_debug_mode)
+
+    @def_function.function
+    def func(x):
+      return (x + constant_op.constant(4.0)) / x
+
+    x = constant_op.constant(2.0)
+    self.assertAllClose(self.evaluate(func(x)), 3.0)
+    writer.FlushNonExecutionFiles()
+    writer.FlushExecutionFiles()
+
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      graph_op_digests = reader.graph_op_digests()
+      placeholder_op_name = None
+      const_op_name = None
+      add_op_name = None
+      div_op_name = None
+      for op_digest in graph_op_digests:
+        if op_digest.op_type == "Placeholder":
+          placeholder_op_name = op_digest.op_name
+        elif op_digest.op_type == "Const":
+          const_op_name = op_digest.op_name
+        elif op_digest.op_type == "AddV2":
+          add_op_name = op_digest.op_name
+          self.assertLen(op_digest.input_names, 2)
+          self.assertEqual(op_digest.input_names[0], placeholder_op_name + ":0")
+          self.assertEqual(op_digest.input_names[1], const_op_name + ":0")
+        elif op_digest.op_type == "RealDiv":
+          div_op_name = op_digest
+          self.assertLen(op_digest.input_names, 2)
+          self.assertEqual(op_digest.input_names[0], add_op_name + ":0")
+          self.assertEqual(op_digest.input_names[1], placeholder_op_name + ":0")
+      self.assertTrue(add_op_name)
+      self.assertTrue(div_op_name)
+
 
 if __name__ == "__main__":
   ops.enable_eager_execution()