Create basic server coverage and model tests.

PiperOrigin-RevId: 626653884
tensorflow · May 14, 2024 · 8f02b96 · 8f02b96
1 parent 52911fb
commit 8f02b96
Show file tree

Hide file tree

Showing 5 changed files with 10 additions and 14 deletions.
diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/python/BUILD b/tensorflow/compiler/mlir/quantization/stablehlo/python/BUILD
@@ -30,6 +30,7 @@ package(
 pytype_strict_library(
     name = "quantization",
     srcs = ["quantization.py"],
+    visibility = ["//visibility:public"],
     deps = [
         ":pywrap_quantization",
         "//tensorflow/compiler/mlir/quantization/stablehlo:quantization_config_proto_py",
@@ -45,6 +46,10 @@ pytype_strict_library(
 #     testonly = 1,
 #     srcs = ["integration_test/quantize_model_test_base.py"],
 #     tags = ["no_pip"],
+#     visibility = [
+#         "//learning/brain/mlir/quantization/stablehlo:__subpackages__",
+#         "//tensorflow/compiler/mlir/quantization:__subpackages__",
+#     ],
 #     deps = [
 #         "//third_party/py/mlir:ir",
 #         "//third_party/py/mlir:stablehlo_dialect",

diff --git a/tensorflow/core/tfrt/ifrt/sharding_utils.cc b/tensorflow/core/tfrt/ifrt/sharding_utils.cc
@@ -174,8 +174,8 @@ SplitAndCreateArraysFromHostBuffer(
                   kImmutableUntilTransferCompletes,
               [tensor, slice_idx]() {
                 // Keep tensor alive
-                LOG(INFO) << "Done with host buffer for slice " << slice_idx
-                          << " at " << tensor.data();
+                VLOG(2) << "Done with host buffer for slice " << slice_idx
+                        << " at " << tensor.data();
               }));
       arrays.push_back(std::move(array));
       device_iter++;

diff --git a/third_party/xla/xla/debug_options_flags.cc b/third_party/xla/xla/debug_options_flags.cc
@@ -147,7 +147,6 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() {
   opts.set_xla_gpu_redzone_padding_bytes(8 * 1024 * 1024);
   opts.set_xla_gpu_shape_checks(DebugOptions::RUNTIME);
   opts.set_xla_gpu_normalize_layouts(true);
-  opts.set_xla_gpu_simplify_all_fp_conversions(true);
   opts.set_xla_dump_latency_hiding_schedule(false);
   opts.set_xla_gpu_enable_latency_hiding_scheduler(false);
   opts.set_xla_gpu_lhs_enable_gpu_async_tracker(true);
@@ -1229,11 +1228,6 @@ void MakeDebugOptionsFlags(std::vector<tsl::Flag>* flag_list,
       "Amount of padding the redzone allocator will put on one side of each "
       "buffer it allocates. (So the buffer's total size will be increased by "
       "2x this value.)"));
-  flag_list->push_back(tsl::Flag(
-      "xla_gpu_simplify_all_fp_conversions",
-      bool_setter_for(&DebugOptions::set_xla_gpu_simplify_all_fp_conversions),
-      debug_options->xla_gpu_simplify_all_fp_conversions(),
-      "Allows any chain of floating-point conversions to be simplified."));
   flag_list->push_back(tsl::Flag(
       "xla_gpu_shape_checks", setter_for_xla_gpu_shape_checks,
       DebugOptions::ShapeChecks_Name(debug_options->xla_gpu_shape_checks()),

diff --git a/third_party/xla/xla/service/gpu/gpu_compiler.cc b/third_party/xla/xla/service/gpu/gpu_compiler.cc
@@ -1324,8 +1324,7 @@ absl::Status GpuCompiler::OptimizeHloPostLayoutAssignment(
     sub_pipeline.AddPass<FloatNormalization>(&f8e5m2fnuz_support);
     sub_pipeline.AddPass<FloatNormalization>(&f8e4m3fnuz_support);
     // Remove `f32 -> bf16 -> f32` casts inserted by bf16 normalization.
-    if (debug_options.xla_allow_excess_precision() &&
-        debug_options.xla_gpu_simplify_all_fp_conversions()) {
+    if (debug_options.xla_allow_excess_precision()) {
       sub_pipeline.AddPass<SimplifyFPConversions>();
     }
   };
@@ -1466,8 +1465,7 @@ absl::Status GpuCompiler::OptimizeHloPostLayoutAssignment(
   pipeline.AddPass<HloPassFix<GpuAlgebraicSimplifier>>(simplifier_options,
                                                        gpu_version);
 
-  if (debug_options.xla_allow_excess_precision() &&
-      debug_options.xla_gpu_simplify_all_fp_conversions()) {
+  if (debug_options.xla_allow_excess_precision()) {
     // This pass cleans up chains of compiler-generated converts
     // (i.e. f32 -> bf16 -> f32) that have been produced by the algebraic
     // simplifier by rearranging ops (i.e. by pushing broadcasts towards the

diff --git a/third_party/xla/xla/xla.proto b/third_party/xla/xla/xla.proto
@@ -519,8 +519,7 @@ message DebugOptions {
   // scratch), so this can be multiplied by quite a lot.
   int64 xla_gpu_redzone_padding_bytes = 228;
 
-  // Deprecated. Use xla_allow_excess_precision instead.
-  bool xla_gpu_simplify_all_fp_conversions = 168 [deprecated = true];
+  reserved 168;  // Was xla_allow_excess_precision.
 
   // An experimental option to force all layouts present in the
   // after-optimizations HLO to be descending, e.g.