[go: nahoru, domu]

Skip to content

Commit

Permalink
Automated rollback of changelist 450795394
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 456418896
  • Loading branch information
zwestrick authored and tfx-copybara committed Jun 22, 2022
1 parent ff9542b commit a4290ae
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 28 deletions.
2 changes: 2 additions & 0 deletions tensorflow_data_validation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
"""Init module for TensorFlow Data Validation."""

# Import stats API.
from tensorflow_data_validation.api.stats_api import default_sharded_output_suffix
from tensorflow_data_validation.api.stats_api import default_sharded_output_supported
from tensorflow_data_validation.api.stats_api import GenerateStatistics
from tensorflow_data_validation.api.stats_api import MergeDatasetFeatureStatisticsList
from tensorflow_data_validation.api.stats_api import WriteStatisticsToBinaryFile
Expand Down
34 changes: 6 additions & 28 deletions tensorflow_data_validation/api/stats_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,33 +215,11 @@ def expand(self, stats: beam.PCollection) -> beam.pvalue.PDone:
self._binary_proto_path))


# TODO(b/450795394): Delete this once it's safe to directly call the above.
@beam.typehints.with_input_types(statistics_pb2.DatasetFeatureStatisticsList)
@beam.typehints.with_output_types(beam.pvalue.PDone)
class WriteStatisticsBinaryAndMaybeRecords(beam.PTransform):
"""Writes binary stats and sharded stats with default format if supported.
def default_sharded_output_supported() -> bool:
"""True if sharded output is supported by default."""
return statistics_io_impl.should_write_sharded()

Currently Experimental.
"""

def __init__(
self,
binary_proto_path: str,
records_path_prefix_no_suffix: str,
) -> None:
"""Initialize WriteStatisticsBinaryAndMaybeRecords.
Args:
binary_proto_path: Output path for writing statistics as a binary proto.
records_path_prefix_no_suffix: File pattern for writing statistics to
sharded records. An appropriate file type suffix (e.g., .tfrecords) and
shard numbers will be added.
"""
if statistics_io_impl.should_write_sharded():
io_provider = statistics_io_impl.get_io_provider()
records_path_prefix = (
records_path_prefix_no_suffix + io_provider.file_suffix())
self._output_transform = WriteStatisticsToRecordsAndBinaryFile(
binary_proto_path, records_path_prefix, io_provider)
else:
self._output_transform = WriteStatisticsToBinaryFile(binary_proto_path)
def default_sharded_output_suffix() -> str:
"""Returns the default sharded output suffix."""
return statistics_io_impl.get_io_provider().file_suffix()
3 changes: 3 additions & 0 deletions tensorflow_data_validation/utils/stats_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,9 @@ def load_sharded_statistics(
io_provider = statistics_io_impl.get_io_provider()
if input_path_prefix is not None:
input_paths = io_provider.glob(input_path_prefix)
if not input_paths:
raise ValueError('No input paths found paths=%s, pattern=%s' %
(input_paths, input_path_prefix))
acc = statistics.DatasetListAccumulator()
stats_iter = io_provider.record_iterator_impl(input_paths)
for stats_list in stats_iter:
Expand Down

0 comments on commit a4290ae

Please sign in to comment.