[go: nahoru, domu]

Skip to content

Commit

Permalink
Create basic server coverage and model tests.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 626653884
  • Loading branch information
tensorflower-gardener committed May 14, 2024
1 parent 52911fb commit 8f02b96
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 14 deletions.
5 changes: 5 additions & 0 deletions tensorflow/compiler/mlir/quantization/stablehlo/python/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ package(
pytype_strict_library(
name = "quantization",
srcs = ["quantization.py"],
visibility = ["//visibility:public"],
deps = [
":pywrap_quantization",
"//tensorflow/compiler/mlir/quantization/stablehlo:quantization_config_proto_py",
Expand All @@ -45,6 +46,10 @@ pytype_strict_library(
# testonly = 1,
# srcs = ["integration_test/quantize_model_test_base.py"],
# tags = ["no_pip"],
# visibility = [
# "//learning/brain/mlir/quantization/stablehlo:__subpackages__",
# "//tensorflow/compiler/mlir/quantization:__subpackages__",
# ],
# deps = [
# "//third_party/py/mlir:ir",
# "//third_party/py/mlir:stablehlo_dialect",
Expand Down
4 changes: 2 additions & 2 deletions tensorflow/core/tfrt/ifrt/sharding_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,8 @@ SplitAndCreateArraysFromHostBuffer(
kImmutableUntilTransferCompletes,
[tensor, slice_idx]() {
// Keep tensor alive
LOG(INFO) << "Done with host buffer for slice " << slice_idx
<< " at " << tensor.data();
VLOG(2) << "Done with host buffer for slice " << slice_idx
<< " at " << tensor.data();
}));
arrays.push_back(std::move(array));
device_iter++;
Expand Down
6 changes: 0 additions & 6 deletions third_party/xla/xla/debug_options_flags.cc
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,6 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() {
opts.set_xla_gpu_redzone_padding_bytes(8 * 1024 * 1024);
opts.set_xla_gpu_shape_checks(DebugOptions::RUNTIME);
opts.set_xla_gpu_normalize_layouts(true);
opts.set_xla_gpu_simplify_all_fp_conversions(true);
opts.set_xla_dump_latency_hiding_schedule(false);
opts.set_xla_gpu_enable_latency_hiding_scheduler(false);
opts.set_xla_gpu_lhs_enable_gpu_async_tracker(true);
Expand Down Expand Up @@ -1229,11 +1228,6 @@ void MakeDebugOptionsFlags(std::vector<tsl::Flag>* flag_list,
"Amount of padding the redzone allocator will put on one side of each "
"buffer it allocates. (So the buffer's total size will be increased by "
"2x this value.)"));
flag_list->push_back(tsl::Flag(
"xla_gpu_simplify_all_fp_conversions",
bool_setter_for(&DebugOptions::set_xla_gpu_simplify_all_fp_conversions),
debug_options->xla_gpu_simplify_all_fp_conversions(),
"Allows any chain of floating-point conversions to be simplified."));
flag_list->push_back(tsl::Flag(
"xla_gpu_shape_checks", setter_for_xla_gpu_shape_checks,
DebugOptions::ShapeChecks_Name(debug_options->xla_gpu_shape_checks()),
Expand Down
6 changes: 2 additions & 4 deletions third_party/xla/xla/service/gpu/gpu_compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1324,8 +1324,7 @@ absl::Status GpuCompiler::OptimizeHloPostLayoutAssignment(
sub_pipeline.AddPass<FloatNormalization>(&f8e5m2fnuz_support);
sub_pipeline.AddPass<FloatNormalization>(&f8e4m3fnuz_support);
// Remove `f32 -> bf16 -> f32` casts inserted by bf16 normalization.
if (debug_options.xla_allow_excess_precision() &&
debug_options.xla_gpu_simplify_all_fp_conversions()) {
if (debug_options.xla_allow_excess_precision()) {
sub_pipeline.AddPass<SimplifyFPConversions>();
}
};
Expand Down Expand Up @@ -1466,8 +1465,7 @@ absl::Status GpuCompiler::OptimizeHloPostLayoutAssignment(
pipeline.AddPass<HloPassFix<GpuAlgebraicSimplifier>>(simplifier_options,
gpu_version);

if (debug_options.xla_allow_excess_precision() &&
debug_options.xla_gpu_simplify_all_fp_conversions()) {
if (debug_options.xla_allow_excess_precision()) {
// This pass cleans up chains of compiler-generated converts
// (i.e. f32 -> bf16 -> f32) that have been produced by the algebraic
// simplifier by rearranging ops (i.e. by pushing broadcasts towards the
Expand Down
3 changes: 1 addition & 2 deletions third_party/xla/xla/xla.proto
Original file line number Diff line number Diff line change
Expand Up @@ -519,8 +519,7 @@ message DebugOptions {
// scratch), so this can be multiplied by quite a lot.
int64 xla_gpu_redzone_padding_bytes = 228;

// Deprecated. Use xla_allow_excess_precision instead.
bool xla_gpu_simplify_all_fp_conversions = 168 [deprecated = true];
reserved 168; // Was xla_allow_excess_precision.

// An experimental option to force all layouts present in the
// after-optimizations HLO to be descending, e.g.
Expand Down

0 comments on commit 8f02b96

Please sign in to comment.