[go: nahoru, domu]

Skip to content

Commit

Permalink
[tfdbg2] Fix debugger-generated names for Placeholder and Const ops
Browse files Browse the repository at this point in the history
to facilitate input tracing in the parts of a Graph that contains those ops.

1. tfdbg2's dumping callback renames Const and
   Placeholder/PlaceholderWithDefault ops to avoid duplicate (e.g., None)
   op names in the same graph. It turns out a simpler way to solve it is to
   use the output tensor name of those ops, which are not duplicate in a graph.
2. tfdbg2 adds Identity ops in the graph under the v1 graph mode. To allow
   tracing through these Identity ops, we create a new dict called
   `_tensor_aliases` to map the names of those debugger-generated Identity ops
   to the names of the original instrumented graph tensors.

PiperOrigin-RevId: 309414953
Change-Id: I73c086e8faba849562142a6a3d246938ebd25811
  • Loading branch information
caisq authored and tensorflower-gardener committed May 1, 2020
1 parent c469d6a commit 71336da
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 8 deletions.
46 changes: 38 additions & 8 deletions tensorflow/python/debug/lib/dumping_callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,12 @@ def __init__(self,
self._function_to_graph_id = dict()
self._op_type_to_context_id = dict()
# Keeps track of counter for symbolic tensors output by in-graph ops.
# It is used to make unique names for debugger-generated tensors.
self._symbolic_tensor_counter = 0
# A map from the names of debugger-generated Identity and DebugIdentityV2
# tensors to the names of the original insrumented graph tensors. This is
# applicable to v1 graph mode only.
self._tensor_aliases = dict()
self._source_file_paths_lock = threading.Lock()
self._stack_frame_to_id_lock = threading.Lock()
self._context_lock = threading.Lock()
Expand Down Expand Up @@ -298,11 +303,15 @@ def _process_v1_graph_mode_tensor(self,
# of Const ops can lead to downstream errors related to shapes. We opt
# to use an identity op to avoid this issue at the cost of slightly
# larger graph size.
self._tensor_aliases[debug_tensor.name] = tensor.name
return debug_tensor
else:
identity = array_ops.identity(tensor)
with self._symbolic_tensor_counter_lock:
identity_name = "tfdbg_identity_%d" % self._symbolic_tensor_counter
identity = array_ops.identity(tensor, name=identity_name)
identity.op._add_control_input( # pylint: disable=protected-access
debug_tensor.op)
self._tensor_aliases[identity.name] = tensor.name
return identity

def _instrument_symbolic_tensors(self,
Expand Down Expand Up @@ -354,6 +363,9 @@ def _instrument_symbolic_tensors(self,
continue
# Except in V1 graph mode + control flow, debug_identity_v2 triggers
# auto control dependency because it's a stateful op.
with self._symbolic_tensor_counter_lock:
debug_identity_name = ("DebugIdentityV2_%d" %
self._symbolic_tensor_counter)
debug_tensor = gen_debug_ops.debug_identity_v2(
# Use an empty (shape=[0]) float32 tensor for the NO_TENSOR mode
# as a low-overhead placeholder, since no actual tensor value is
Expand All @@ -363,7 +375,8 @@ def _instrument_symbolic_tensors(self,
op_name=op_name,
output_slot=output_slot,
tensor_debug_mode=self._tensor_debug_mode,
debug_urls=debug_urls)
debug_urls=debug_urls,
name=debug_identity_name)
if is_v1_graph_mode:
instrumented_tensors.append(self._process_v1_graph_mode_tensor(
op_type, tensor, debug_tensor, tensor_debug_mode))
Expand Down Expand Up @@ -537,13 +550,12 @@ def callback(self,
output_tensor_ids = self._get_symbolic_tensor_ids(len(outputs))
if op_type in ("Const", "Placeholder", "PlaceholderWithDefault"):
# In some cases, the op name of a Const or Placeholder op in a graph
# can be duplicate (e.g., with the name "resource").
# When this happens, we give the op an debugger-generated name
# in order to prevent problems and check failures down the pipe.
op_name = "%s_%d" % (op_name, self._symbolic_tensor_counter)
# can be duplicate (e.g., `None` or "resource").
# When this happens, we use the output tensor name to infer
# the non-duplicated tensor name.
op_name = outputs[0].name.split(":")[0]
if is_v1_graph_mode:
for input_tensor in inputs:
# TODO(cais):
if input_tensor in self._placeholder_to_debug_tensor and outputs:
outputs[0].op._add_control_input( # pylint: disable=protected-access
self._placeholder_to_debug_tensor[input_tensor].op)
Expand All @@ -552,7 +564,9 @@ def callback(self,
op_name=op_name,
graph_name=graph.name if hasattr(graph, "name") else None,
graph_id=context_id,
input_names=[input_tensor.name for input_tensor in inputs],
input_names=[
self._lookup_tensor_name(input_tensor) for input_tensor in inputs
],
num_outputs=len(outputs),
output_tensor_ids=output_tensor_ids,
code_location=self._process_stack_frames())
Expand All @@ -577,6 +591,22 @@ def callback(self,
outputs, op_type, input_ids, output_tensor_device_ids,
graph_id=context_id))

def _lookup_tensor_name(self, tensor):
"""Look up the name of a graph tensor.
This method maps the name of a debugger-generated Identity or
DebugIdentityV2 tensor to the name of the original instrumented tensor,
if `tensor` is such a debugger-created tensor.
Otherwise, it returns the name of `tensor` as is.
Args:
tensor: The graph tensor to look up the name for.
Returns:
Name of the orignal instrumented tensor as known to the debugger.
"""
return self._tensor_aliases.get(tensor.name, tensor.name)

def _func_graph_id_from_func_name(self, op_type):
"""Attempt to get the ID of a FuncGraph based on an op type name.
Expand Down
45 changes: 45 additions & 0 deletions tensorflow/python/debug/lib/dumping_callback_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1386,6 +1386,51 @@ def iterative_doubling(x, times):
# The Mul and Sub ops are from the same innermost context.
self.assertEqual(mul_op_digest.graph_id, sub_op_digest.graph_id)

@parameterized.named_parameters(
("NoTensor", "NO_TENSOR"),
("Shape", "SHAPE"),
("FullTensor", "FULL_TENSOR"),
)
@test_util.run_in_graph_and_eager_modes
def testGraphInputTracingWorksWithConstAndPlaceholderTensors(
self, tensor_debug_mode):
writer = dumping_callback.enable_dump_debug_info(
self.dump_root, tensor_debug_mode=tensor_debug_mode)

@def_function.function
def func(x):
return (x + constant_op.constant(4.0)) / x

x = constant_op.constant(2.0)
self.assertAllClose(self.evaluate(func(x)), 3.0)
writer.FlushNonExecutionFiles()
writer.FlushExecutionFiles()

with debug_events_reader.DebugDataReader(self.dump_root) as reader:
reader.update()
graph_op_digests = reader.graph_op_digests()
placeholder_op_name = None
const_op_name = None
add_op_name = None
div_op_name = None
for op_digest in graph_op_digests:
if op_digest.op_type == "Placeholder":
placeholder_op_name = op_digest.op_name
elif op_digest.op_type == "Const":
const_op_name = op_digest.op_name
elif op_digest.op_type == "AddV2":
add_op_name = op_digest.op_name
self.assertLen(op_digest.input_names, 2)
self.assertEqual(op_digest.input_names[0], placeholder_op_name + ":0")
self.assertEqual(op_digest.input_names[1], const_op_name + ":0")
elif op_digest.op_type == "RealDiv":
div_op_name = op_digest
self.assertLen(op_digest.input_names, 2)
self.assertEqual(op_digest.input_names[0], add_op_name + ":0")
self.assertEqual(op_digest.input_names[1], placeholder_op_name + ":0")
self.assertTrue(add_op_name)
self.assertTrue(div_op_name)


if __name__ == "__main__":
ops.enable_eager_execution()
Expand Down

0 comments on commit 71336da

Please sign in to comment.